##// END OF EJS Templates
merged default branch into stable
super-admin -
r1219:e9ee7632 merge stable
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,53 b''
1 ; #####################
2 ; LOGGING CONFIGURATION
3 ; #####################
4 ; Logging template, used for configure the logging
5 ; some variables here are replaced by RhodeCode to default values
6
7 [loggers]
8 keys = root, vcsserver
9
10 [handlers]
11 keys = console
12
13 [formatters]
14 keys = generic, json
15
16 ; #######
17 ; LOGGERS
18 ; #######
19 [logger_root]
20 level = NOTSET
21 handlers = console
22
23 [logger_vcsserver]
24 level = $RC_LOGGING_LEVEL
25 handlers =
26 qualname = vcsserver
27 propagate = 1
28
29 ; ########
30 ; HANDLERS
31 ; ########
32
33 [handler_console]
34 class = StreamHandler
35 args = (sys.stderr, )
36 level = $RC_LOGGING_LEVEL
37 ; To enable JSON formatted logs replace generic with json
38 ; This allows sending properly formatted logs to grafana loki or elasticsearch
39 #formatter = json
40 #formatter = generic
41 formatter = $RC_LOGGING_FORMATTER
42
43 ; ##########
44 ; FORMATTERS
45 ; ##########
46
47 [formatter_generic]
48 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
49 datefmt = %Y-%m-%d %H:%M:%S
50
51 [formatter_json]
52 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
53 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -0,0 +1,73 b''
1 [build-system]
2 requires = ["setuptools>=61.0.0", "wheel"]
3 build-backend = "setuptools.build_meta"
4
5 [project]
6 name = "rhodecode-vcsserver"
7 description = "Version Control System Server for RhodeCode"
8 authors = [
9 {name = "RhodeCode GmbH", email = "support@rhodecode.com"},
10 ]
11
12 license = {text = "GPL V3"}
13 requires-python = ">=3.10"
14 dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
15 classifiers = [
16 'Development Status :: 6 - Mature',
17 'Intended Audience :: Developers',
18 'Operating System :: OS Independent',
19 'Topic :: Software Development :: Version Control',
20 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
21 'Programming Language :: Python :: 3.10',
22 ]
23
24 [project.entry-points."paste.app_factory"]
25 main = "vcsserver.http_main:main"
26
27
28 [tool.setuptools]
29 packages = ["vcsserver"]
30
31 [tool.setuptools.dynamic]
32 readme = {file = ["README.rst"], content-type = "text/rst"}
33 version = {file = "vcsserver/VERSION"}
34 dependencies = {file = ["requirements.txt"]}
35 optional-dependencies.tests = {file = ["requirements_test.txt"]}
36
37 [tool.ruff]
38
39 select = [
40 # Pyflakes
41 "F",
42 # Pycodestyle
43 "E",
44 "W",
45 # isort
46 "I001"
47 ]
48
49 ignore = [
50 "E501", # line too long, handled by black
51 ]
52
53 # Same as Black.
54 line-length = 120
55
56 [tool.ruff.isort]
57
58 known-first-party = ["vcsserver"]
59
60 [tool.ruff.format]
61
62 # Like Black, use double quotes for strings.
63 quote-style = "double"
64
65 # Like Black, indent with spaces, rather than tabs.
66 indent-style = "space"
67
68 # Like Black, respect magic trailing commas.
69 skip-magic-trailing-comma = false
70
71 # Like Black, automatically detect the appropriate line ending.
72 line-ending = "auto"
73
@@ -0,0 +1,1 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
@@ -0,0 +1,27 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 HOOK_REPO_SIZE = 'changegroup.repo_size'
20
21 # HG
22 HOOK_PRE_PULL = 'preoutgoing.pre_pull'
23 HOOK_PULL = 'outgoing.pull_logger'
24 HOOK_PRE_PUSH = 'prechangegroup.pre_push'
25 HOOK_PRETX_PUSH = 'pretxnchangegroup.pre_push'
26 HOOK_PUSH = 'changegroup.push_logger'
27 HOOK_PUSH_KEY = 'pushkey.key_push'
@@ -0,0 +1,168 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import os
20 import textwrap
21 import string
22 import functools
23 import logging
24 import tempfile
25 import logging.config
26
27 from vcsserver.type_utils import str2bool, aslist
28
29 log = logging.getLogger(__name__)
30
31 # skip keys, that are set here, so we don't double process those
32 set_keys = {
33 '__file__': ''
34 }
35
36
37 class SettingsMaker:
38
39 def __init__(self, app_settings):
40 self.settings = app_settings
41
42 @classmethod
43 def _bool_func(cls, input_val):
44 if isinstance(input_val, bytes):
45 # decode to str
46 input_val = input_val.decode('utf8')
47 return str2bool(input_val)
48
49 @classmethod
50 def _int_func(cls, input_val):
51 return int(input_val)
52
53 @classmethod
54 def _list_func(cls, input_val, sep=','):
55 return aslist(input_val, sep=sep)
56
57 @classmethod
58 def _string_func(cls, input_val, lower=True):
59 if lower:
60 input_val = input_val.lower()
61 return input_val
62
63 @classmethod
64 def _float_func(cls, input_val):
65 return float(input_val)
66
67 @classmethod
68 def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
69
70 # ensure we have our dir created
71 if not os.path.isdir(input_val) and ensure_dir:
72 os.makedirs(input_val, mode=mode, exist_ok=True)
73
74 if not os.path.isdir(input_val):
75 raise Exception(f'Dir at {input_val} does not exist')
76 return input_val
77
78 @classmethod
79 def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
80 dirname = os.path.dirname(input_val)
81 cls._dir_func(dirname, ensure_dir=ensure_dir)
82 return input_val
83
84 @classmethod
85 def _key_transformator(cls, key):
86 return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
87
88 def maybe_env_key(self, key):
89 # now maybe we have this KEY in env, search and use the value with higher priority.
90 transformed_key = self._key_transformator(key)
91 envvar_value = os.environ.get(transformed_key)
92 if envvar_value:
93 log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
94
95 return envvar_value
96
97 def env_expand(self):
98 replaced = {}
99 for k, v in self.settings.items():
100 if k not in set_keys:
101 envvar_value = self.maybe_env_key(k)
102 if envvar_value:
103 replaced[k] = envvar_value
104 set_keys[k] = envvar_value
105
106 # replace ALL keys updated
107 self.settings.update(replaced)
108
109 def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
110 """
111 Helper to enable debug on running instance
112 :return:
113 """
114
115 if not str2bool(self.settings.get('logging.autoconfigure')):
116 log.info('logging configuration based on main .ini file')
117 return
118
119 if logging_conf is None:
120 logging_conf = self.settings.get('logging.logging_conf_file') or ''
121
122 if not os.path.isfile(logging_conf):
123 log.error('Unable to setup logging based on %s, '
124 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
125 return
126
127 with open(logging_conf, 'rt') as f:
128 ini_template = textwrap.dedent(f.read())
129 ini_template = string.Template(ini_template).safe_substitute(
130 RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
131 RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
132 )
133
134 with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
135 log.info('Saved Temporary LOGGING config at %s', f.name)
136 f.write(ini_template)
137
138 logging.config.fileConfig(f.name)
139 os.remove(f.name)
140
141 def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
142 input_val = self.settings.get(key, default)
143
144 if default_when_empty and not input_val:
145 # use default value when value is set in the config but it is empty
146 input_val = default
147
148 parser_func = {
149 'bool': self._bool_func,
150 'int': self._int_func,
151 'list': self._list_func,
152 'list:newline': functools.partial(self._list_func, sep='/n'),
153 'list:spacesep': functools.partial(self._list_func, sep=' '),
154 'string': functools.partial(self._string_func, lower=lower),
155 'dir': self._dir_func,
156 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
157 'file': self._file_path_func,
158 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
159 None: lambda i: i
160 }[parser]
161
162 envvar_value = self.maybe_env_key(key)
163 if envvar_value:
164 input_val = envvar_value
165 set_keys[key] = input_val
166
167 self.settings[key] = parser_func(input_val)
168 return self.settings[key]
@@ -0,0 +1,243 b''
1 '''
2 This library is provided to allow standard python logging
3 to output log data as JSON formatted strings
4 '''
5 import logging
6 import json
7 import re
8 from datetime import date, datetime, time, tzinfo, timedelta
9 import traceback
10 import importlib
11
12 from inspect import istraceback
13
14 from collections import OrderedDict
15
16
17 def _inject_req_id(record, *args, **kwargs):
18 return record
19
20
21 ExceptionAwareFormatter = logging.Formatter
22
23
24 ZERO = timedelta(0)
25 HOUR = timedelta(hours=1)
26
27
28 class UTC(tzinfo):
29 """UTC"""
30
31 def utcoffset(self, dt):
32 return ZERO
33
34 def tzname(self, dt):
35 return "UTC"
36
37 def dst(self, dt):
38 return ZERO
39
40 utc = UTC()
41
42
43 # skip natural LogRecord attributes
44 # http://docs.python.org/library/logging.html#logrecord-attributes
45 RESERVED_ATTRS = (
46 'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
47 'funcName', 'levelname', 'levelno', 'lineno', 'module',
48 'msecs', 'message', 'msg', 'name', 'pathname', 'process',
49 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
50
51
52 def merge_record_extra(record, target, reserved):
53 """
54 Merges extra attributes from LogRecord object into target dictionary
55
56 :param record: logging.LogRecord
57 :param target: dict to update
58 :param reserved: dict or list with reserved keys to skip
59 """
60 for key, value in record.__dict__.items():
61 # this allows to have numeric keys
62 if (key not in reserved
63 and not (hasattr(key, "startswith")
64 and key.startswith('_'))):
65 target[key] = value
66 return target
67
68
69 class JsonEncoder(json.JSONEncoder):
70 """
71 A custom encoder extending the default JSONEncoder
72 """
73
74 def default(self, obj):
75 if isinstance(obj, (date, datetime, time)):
76 return self.format_datetime_obj(obj)
77
78 elif istraceback(obj):
79 return ''.join(traceback.format_tb(obj)).strip()
80
81 elif type(obj) == Exception \
82 or isinstance(obj, Exception) \
83 or type(obj) == type:
84 return str(obj)
85
86 try:
87 return super().default(obj)
88
89 except TypeError:
90 try:
91 return str(obj)
92
93 except Exception:
94 return None
95
96 def format_datetime_obj(self, obj):
97 return obj.isoformat()
98
99
100 class JsonFormatter(ExceptionAwareFormatter):
101 """
102 A custom formatter to format logging records as json strings.
103 Extra values will be formatted as str() if not supported by
104 json default encoder
105 """
106
107 def __init__(self, *args, **kwargs):
108 """
109 :param json_default: a function for encoding non-standard objects
110 as outlined in http://docs.python.org/2/library/json.html
111 :param json_encoder: optional custom encoder
112 :param json_serializer: a :meth:`json.dumps`-compatible callable
113 that will be used to serialize the log record.
114 :param json_indent: an optional :meth:`json.dumps`-compatible numeric value
115 that will be used to customize the indent of the output json.
116 :param prefix: an optional string prefix added at the beginning of
117 the formatted string
118 :param json_indent: indent parameter for json.dumps
119 :param json_ensure_ascii: ensure_ascii parameter for json.dumps
120 :param reserved_attrs: an optional list of fields that will be skipped when
121 outputting json log record. Defaults to all log record attributes:
122 http://docs.python.org/library/logging.html#logrecord-attributes
123 :param timestamp: an optional string/boolean field to add a timestamp when
124 outputting the json log record. If string is passed, timestamp will be added
125 to log record using string as key. If True boolean is passed, timestamp key
126 will be "timestamp". Defaults to False/off.
127 """
128 self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
129 self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
130 self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
131 self.json_indent = kwargs.pop("json_indent", None)
132 self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
133 self.prefix = kwargs.pop("prefix", "")
134 reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
135 self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
136 self.timestamp = kwargs.pop("timestamp", True)
137
138 # super(JsonFormatter, self).__init__(*args, **kwargs)
139 logging.Formatter.__init__(self, *args, **kwargs)
140 if not self.json_encoder and not self.json_default:
141 self.json_encoder = JsonEncoder
142
143 self._required_fields = self.parse()
144 self._skip_fields = dict(list(zip(self._required_fields,
145 self._required_fields)))
146 self._skip_fields.update(self.reserved_attrs)
147
148 def _str_to_fn(self, fn_as_str):
149 """
150 If the argument is not a string, return whatever was passed in.
151 Parses a string such as package.module.function, imports the module
152 and returns the function.
153
154 :param fn_as_str: The string to parse. If not a string, return it.
155 """
156 if not isinstance(fn_as_str, str):
157 return fn_as_str
158
159 path, _, function = fn_as_str.rpartition('.')
160 module = importlib.import_module(path)
161 return getattr(module, function)
162
163 def parse(self):
164 """
165 Parses format string looking for substitutions
166
167 This method is responsible for returning a list of fields (as strings)
168 to include in all log messages.
169 """
170 standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
171 return standard_formatters.findall(self._fmt)
172
173 def add_fields(self, log_record, record, message_dict):
174 """
175 Override this method to implement custom logic for adding fields.
176 """
177 for field in self._required_fields:
178 log_record[field] = record.__dict__.get(field)
179 log_record.update(message_dict)
180 merge_record_extra(record, log_record, reserved=self._skip_fields)
181
182 if self.timestamp:
183 key = self.timestamp if type(self.timestamp) == str else 'timestamp'
184 log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
185
186 def process_log_record(self, log_record):
187 """
188 Override this method to implement custom logic
189 on the possibly ordered dictionary.
190 """
191 return log_record
192
193 def jsonify_log_record(self, log_record):
194 """Returns a json string of the log record."""
195 return self.json_serializer(log_record,
196 default=self.json_default,
197 cls=self.json_encoder,
198 indent=self.json_indent,
199 ensure_ascii=self.json_ensure_ascii)
200
201 def serialize_log_record(self, log_record):
202 """Returns the final representation of the log record."""
203 return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
204
205 def format(self, record):
206 """Formats a log record and serializes to json"""
207 message_dict = {}
208 # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
209 # are always type of str. We shouldn't need to override that.
210 if isinstance(record.msg, dict):
211 message_dict = record.msg
212 record.message = None
213 else:
214 record.message = record.getMessage()
215 # only format time if needed
216 if "asctime" in self._required_fields:
217 record.asctime = self.formatTime(record, self.datefmt)
218
219 # Display formatted exception, but allow overriding it in the
220 # user-supplied dict.
221 if record.exc_info and not message_dict.get('exc_info'):
222 message_dict['exc_info'] = self.formatException(record.exc_info)
223 if not message_dict.get('exc_info') and record.exc_text:
224 message_dict['exc_info'] = record.exc_text
225 # Display formatted record of stack frames
226 # default format is a string returned from :func:`traceback.print_stack`
227 try:
228 if record.stack_info and not message_dict.get('stack_info'):
229 message_dict['stack_info'] = self.formatStack(record.stack_info)
230 except AttributeError:
231 # Python2.7 doesn't have stack_info.
232 pass
233
234 try:
235 log_record = OrderedDict()
236 except NameError:
237 log_record = {}
238
239 _inject_req_id(record, with_prefix=False)
240 self.add_fields(log_record, record, message_dict)
241 log_record = self.process_log_record(log_record)
242
243 return self.serialize_log_record(log_record)
@@ -0,0 +1,53 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import sys
20 import logging
21
22
23 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
24
25 # Sequences
26 RESET_SEQ = "\033[0m"
27 COLOR_SEQ = "\033[0;%dm"
28 BOLD_SEQ = "\033[1m"
29
30 COLORS = {
31 'CRITICAL': MAGENTA,
32 'ERROR': RED,
33 'WARNING': CYAN,
34 'INFO': GREEN,
35 'DEBUG': BLUE,
36 'SQL': YELLOW
37 }
38
39
40 class ColorFormatter(logging.Formatter):
41
42 def format(self, record):
43 """
44 Change record's levelname to use with COLORS enum
45 """
46 def_record = super().format(record)
47
48 levelname = record.levelname
49 start = COLOR_SEQ % (COLORS[levelname])
50 end = RESET_SEQ
51
52 colored_record = ''.join([start, def_record, end])
53 return colored_record
@@ -0,0 +1,87 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import logging
19 import os
20 import diskcache
21 from diskcache import RLock
22
23 log = logging.getLogger(__name__)
24
25 cache_meta = None
26
27
28 class ReentrantLock(RLock):
29 def __enter__(self):
30 reentrant_lock_key = self._key
31
32 log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key)
33 #self.acquire()
34 log.debug('Lock for key=%s acquired', reentrant_lock_key)
35
36 def __exit__(self, *exc_info):
37 #self.release()
38 pass
39
40
41 def get_archival_config(config):
42
43 final_config = {
44 'archive_cache.eviction_policy': 'least-frequently-used'
45 }
46
47 for k, v in config.items():
48 if k.startswith('archive_cache'):
49 final_config[k] = v
50
51 return final_config
52
53
54 def get_archival_cache_store(config):
55
56 global cache_meta
57 if cache_meta is not None:
58 return cache_meta
59
60 config = get_archival_config(config)
61
62 archive_cache_dir = config['archive_cache.store_dir']
63 archive_cache_size_gb = config['archive_cache.cache_size_gb']
64 archive_cache_shards = config['archive_cache.cache_shards']
65 archive_cache_eviction_policy = config['archive_cache.eviction_policy']
66
67 log.debug('Initializing archival cache instance under %s', archive_cache_dir)
68
69 # check if it's ok to write, and re-create the archive cache
70 if not os.path.isdir(archive_cache_dir):
71 os.makedirs(archive_cache_dir, exist_ok=True)
72
73 d_cache = diskcache.FanoutCache(
74 archive_cache_dir, shards=archive_cache_shards,
75 cull_limit=0, # manual eviction required
76 size_limit=archive_cache_size_gb * 1024 * 1024 * 1024,
77 eviction_policy=archive_cache_eviction_policy,
78 timeout=30
79 )
80 cache_meta = d_cache
81 return cache_meta
82
83
84 def includeme(config):
85 # init our cache at start, for vcsserver we don't init at runtime
86 # because our cache config is sent via wire on make archive call, this call just lazy-enables the client
87 return
@@ -0,0 +1,2 b''
1 # use orjson by default
2 import orjson as json
@@ -0,0 +1,70 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 from vcsserver.lib._vendor.statsd import client_from_config
19
20
21 class StatsdClientNotInitialised(Exception):
22 pass
23
24
25 class _Singleton(type):
26 """A metaclass that creates a Singleton base class when called."""
27
28 _instances = {}
29
30 def __call__(cls, *args, **kwargs):
31 if cls not in cls._instances:
32 cls._instances[cls] = super().__call__(*args, **kwargs)
33 return cls._instances[cls]
34
35
36 class Singleton(_Singleton("SingletonMeta", (object,), {})):
37 pass
38
39
40 class StatsdClientClass(Singleton):
41 setup_run = False
42 statsd_client = None
43 statsd = None
44 strict_mode_init = False
45
46 def __getattribute__(self, name):
47
48 if name.startswith("statsd"):
49 if self.setup_run:
50 return super().__getattribute__(name)
51 else:
52 if self.strict_mode_init:
53 raise StatsdClientNotInitialised(f"requested key was {name}")
54 return None
55
56 return super().__getattribute__(name)
57
58 def setup(self, settings):
59 """
60 Initialize the client
61 """
62 strict_init_mode = settings.pop('statsd_strict_init', False)
63
64 statsd = client_from_config(settings)
65 self.statsd = statsd
66 self.statsd_client = statsd
67 self.setup_run = True
68
69
70 StatsdClient = StatsdClientClass()
@@ -0,0 +1,160 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import os
19 import tempfile
20
21 from svn import client
22 from svn import core
23 from svn import ra
24
25 from mercurial import error
26
27 from vcsserver.str_utils import safe_bytes
28
29 core.svn_config_ensure(None)
30 svn_config = core.svn_config_get_config(None)
31
32
33 class RaCallbacks(ra.Callbacks):
34 @staticmethod
35 def open_tmp_file(pool): # pragma: no cover
36 (fd, fn) = tempfile.mkstemp()
37 os.close(fd)
38 return fn
39
40 @staticmethod
41 def get_client_string(pool):
42 return b'RhodeCode-subversion-url-checker'
43
44
45 class SubversionException(Exception):
46 pass
47
48
49 class SubversionConnectionException(SubversionException):
50 """Exception raised when a generic error occurs when connecting to a repository."""
51
52
53 def normalize_url(url):
54 if not url:
55 return url
56 if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
57 url = url[4:]
58 url = url.rstrip(b'/')
59 return url
60
61
62 def _create_auth_baton(pool):
63 """Create a Subversion authentication baton. """
64 # Give the client context baton a suite of authentication
65 # providers.h
66 platform_specific = [
67 'svn_auth_get_gnome_keyring_simple_provider',
68 'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
69 'svn_auth_get_keychain_simple_provider',
70 'svn_auth_get_keychain_ssl_client_cert_pw_provider',
71 'svn_auth_get_kwallet_simple_provider',
72 'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
73 'svn_auth_get_ssl_client_cert_file_provider',
74 'svn_auth_get_windows_simple_provider',
75 'svn_auth_get_windows_ssl_server_trust_provider',
76 ]
77
78 providers = []
79
80 for p in platform_specific:
81 if getattr(core, p, None) is not None:
82 try:
83 providers.append(getattr(core, p)())
84 except RuntimeError:
85 pass
86
87 providers += [
88 client.get_simple_provider(),
89 client.get_username_provider(),
90 client.get_ssl_client_cert_file_provider(),
91 client.get_ssl_client_cert_pw_file_provider(),
92 client.get_ssl_server_trust_file_provider(),
93 ]
94
95 return core.svn_auth_open(providers, pool)
96
97
98 class SubversionRepo:
99 """Wrapper for a Subversion repository.
100
101 It uses the SWIG Python bindings, see above for requirements.
102 """
103 def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
104
105 self.username = username
106 self.password = password
107 self.svn_url = core.svn_path_canonicalize(svn_url)
108
109 self.auth_baton_pool = core.Pool()
110 self.auth_baton = _create_auth_baton(self.auth_baton_pool)
111 # self.init_ra_and_client() assumes that a pool already exists
112 self.pool = core.Pool()
113
114 self.ra = self.init_ra_and_client()
115 self.uuid = ra.get_uuid(self.ra, self.pool)
116
117 def init_ra_and_client(self):
118 """Initializes the RA and client layers, because sometimes getting
119 unified diffs runs the remote server out of open files.
120 """
121
122 if self.username:
123 core.svn_auth_set_parameter(self.auth_baton,
124 core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
125 self.username)
126 if self.password:
127 core.svn_auth_set_parameter(self.auth_baton,
128 core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
129 self.password)
130
131 callbacks = RaCallbacks()
132 callbacks.auth_baton = self.auth_baton
133
134 try:
135 return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
136 except SubversionException as e:
137 # e.child contains a detailed error messages
138 msglist = []
139 svn_exc = e
140 while svn_exc:
141 if svn_exc.args[0]:
142 msglist.append(svn_exc.args[0])
143 svn_exc = svn_exc.child
144 msg = '\n'.join(msglist)
145 raise SubversionConnectionException(msg)
146
147
148 class svnremoterepo:
149 """ the dumb wrapper for actual Subversion repositories """
150
151 def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
152 self.username = username or b''
153 self.password = password or b''
154 self.path = normalize_url(svn_url)
155
156 def svn(self):
157 try:
158 return SubversionRepo(self.path, self.username, self.password)
159 except SubversionConnectionException as e:
160 raise error.Abort(safe_bytes(e))
@@ -0,0 +1,17 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,6 +1,5 b''
1 1 [bumpversion]
2 current_version = 4.27.1
2 current_version = 5.0.0
3 3 message = release: Bump version {current_version} to {new_version}
4 4
5 5 [bumpversion:file:vcsserver/VERSION]
6
@@ -1,37 +1,41 b''
1 1 syntax: glob
2
2 3 *.orig
3 4 *.pyc
4 5 *.swp
5 6 *.sqlite
6 7 *.tox
7 8 *.egg-info
8 9 *.egg
9 10 *.eggs
10 11 *.idea
11 12 .DS_Store*
12 13
13 14
14 15 syntax: regexp
15 16
16 17 #.filename
17 18 ^\.settings$
18 19 ^\.project$
19 20 ^\.pydevproject$
20 21 ^\.coverage$
21 22 ^\.cache.*$
23 ^\.venv.*$
24 ^\.ruff_cache.*$
22 25 ^\.rhodecode$
23 26
27
24 28 ^.dev
25 29 ^build/
26 30 ^coverage\.xml$
27 31 ^data$
28 32 ^dev.ini$
29 33 ^acceptance_tests/dev.*\.ini$
30 34 ^dist/
31 35 ^fabfile.py
32 36 ^htmlcov
33 37 ^junit\.xml$
34 38 ^node_modules/
35 39 ^pylint.log$
36 40 ^build$
37 41 ^result$
@@ -1,16 +1,23 b''
1 1 # top level files
2 2 include *.rst
3 3 include *.txt
4 4
5 5 # package extras
6 6 include vcsserver/VERSION
7 7
8 # all python files inside vcsserver
9 graft vcsserver
10
8 11 # all config files
9 12 recursive-include configs *
10 13
11 14 # hook templates
12 15 recursive-include vcsserver/hook_utils/hook_templates *
13 16
14 17 # skip any tests files
15 18 recursive-exclude vcsserver/tests *
16 19
20 recursive-exclude docs/_build *
21 recursive-exclude * __pycache__
22 recursive-exclude * *.py[co]
23 recursive-exclude * .*.sw[a-z]
@@ -1,45 +1,139 b''
1 .DEFAULT_GOAL := help
1 # required for pushd to work..
2 SHELL = /bin/bash
3
2 4
3 5 # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
4 6 OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
5 7
6 8 .PHONY: clean
7 clean: ## full clean
9 ## Cleanup compiled and cache py files
10 clean:
8 11 make test-clean
9 12 find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';'
13 find . -type d -name "build" -prune -exec rm -rf '{}' ';'
10 14
11 15
12 16 .PHONY: test
13 test: ## run test-clean and tests
17 ## run test-clean and tests
18 test:
14 19 make test-clean
15 20 make test-only
16 21
17 22
18 .PHONY:test-clean
19 test-clean: ## run test-clean and tests
23 .PHONY: test-clean
24 ## run test-clean and tests
25 test-clean:
20 26 rm -rf coverage.xml htmlcov junit.xml pylint.log result
21 27 find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
22 28 find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';'
23 29
24 30
25 31 .PHONY: test-only
26 test-only: ## run tests
32 ## Run tests only without cleanup
33 test-only:
27 34 PYTHONHASHSEED=random \
28 35 py.test -x -vv -r xw -p no:sugar \
29 --cov=vcsserver --cov-report=term-missing --cov-report=html \
30 vcsserver
36 --cov-report=term-missing --cov-report=html \
37 --cov=vcsserver vcsserver
31 38
32 39
33 .PHONY: generate-pkgs
34 generate-pkgs: ## generate new python packages
35 nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses"
40 .PHONY: ruff-check
41 ## run a ruff analysis
42 ruff-check:
43 ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
36 44
37 45
38 46 .PHONY: pip-packages
39 pip-packages: ## show outdated packages
47 ## Show outdated packages
48 pip-packages:
40 49 python ${OUTDATED_PACKAGES}
41 50
42 51
43 .PHONY: help
44 help:
45 @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-24s\033[0m %s\n", $$1, $$2}'
52 .PHONY: build
53 ## Build sdist/egg
54 build:
55 python -m build
56
57
58 .PHONY: dev-sh
59 ## make dev-sh
60 dev-sh:
61 sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list"
62 sudo apt-get update
63 sudo apt-get install -y zsh carapace-bin
64 rm -rf /home/rhodecode/.oh-my-zsh
65 curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
66 echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
67 PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
68
69
70 .PHONY: dev-env
71 ## make dev-env based on the requirements files and install develop of packages
72 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
73 dev-env:
74 pip install build virtualenv
75 pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
76 pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
77 pip install -e .
78
79
80 .PHONY: sh
81 ## shortcut for make dev-sh dev-env
82 sh:
83 make dev-env
84 make dev-sh
85
86
87 .PHONY: dev-srv
88 ## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
89 dev-srv:
90 pserve --reload .dev/dev.ini
91
92
93 .PHONY: dev-srv-g
94 ## run gunicorn multi process workers
95 dev-srv-g:
96 gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
97
98
99 # Default command on calling make
100 .DEFAULT_GOAL := show-help
101
102 .PHONY: show-help
103 show-help:
104 @echo "$$(tput bold)Available rules:$$(tput sgr0)"
105 @echo
106 @sed -n -e "/^## / { \
107 h; \
108 s/.*//; \
109 :doc" \
110 -e "H; \
111 n; \
112 s/^## //; \
113 t doc" \
114 -e "s/:.*//; \
115 G; \
116 s/\\n## /---/; \
117 s/\\n/ /g; \
118 p; \
119 }" ${MAKEFILE_LIST} \
120 | LC_ALL='C' sort --ignore-case \
121 | awk -F '---' \
122 -v ncol=$$(tput cols) \
123 -v indent=19 \
124 -v col_on="$$(tput setaf 6)" \
125 -v col_off="$$(tput sgr0)" \
126 '{ \
127 printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
128 n = split($$2, words, " "); \
129 line_length = ncol - indent; \
130 for (i = 1; i <= n; i++) { \
131 line_length -= length(words[i]) + 1; \
132 if (line_length <= 0) { \
133 line_length = ncol - indent - length(words[i]) - 1; \
134 printf "\n%*s ", -indent, " "; \
135 } \
136 printf "%s ", words[i]; \
137 } \
138 printf "\n"; \
139 }'
@@ -1,246 +1,204 b''
1 ## -*- coding: utf-8 -*-
1 #
2 2
3 3 ; #################################
4 4 ; RHODECODE VCSSERVER CONFIGURATION
5 5 ; #################################
6 6
7 7 [server:main]
8 8 ; COMMON HOST/IP CONFIG
9 9 host = 0.0.0.0
10 port = 9900
10 port = 10010
11 11
12 12 ; ##################################################
13 13 ; WAITRESS WSGI SERVER - Recommended for Development
14 14 ; ##################################################
15 15
16 16 ; use server type
17 17 use = egg:waitress#main
18 18
19 19 ; number of worker threads
20 20 threads = 5
21 21
22 22 ; MAX BODY SIZE 100GB
23 23 max_request_body_size = 107374182400
24 24
25 25 ; Use poll instead of select, fixes file descriptors limits problems.
26 26 ; May not work on old windows systems.
27 27 asyncore_use_poll = true
28 28
29 29
30 30 ; ###########################
31 31 ; GUNICORN APPLICATION SERVER
32 32 ; ###########################
33 33
34 ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
34 ; run with gunicorn --paste rhodecode.ini
35 35
36 36 ; Module to use, this setting shouldn't be changed
37 37 #use = egg:gunicorn#main
38 38
39 ; Sets the number of process workers. More workers means more concurrent connections
40 ; RhodeCode can handle at the same time. Each additional worker also it increases
41 ; memory usage as each has it's own set of caches.
42 ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
43 ; than 8-10 unless for really big deployments .e.g 700-1000 users.
44 ; `instance_id = *` must be set in the [app:main] section below (which is the default)
45 ; when using more than 1 worker.
46 #workers = 2
47
48 ; Gunicorn access log level
49 #loglevel = info
50
51 ; Process name visible in process list
52 #proc_name = rhodecode_vcsserver
53
54 ; Type of worker class, one of `sync`, `gevent`
55 ; currently `sync` is the only option allowed.
56 #worker_class = sync
57
58 ; The maximum number of simultaneous clients. Valid only for gevent
59 #worker_connections = 10
60
61 ; Max number of requests that worker will handle before being gracefully restarted.
62 ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
63 #max_requests = 1000
64 #max_requests_jitter = 30
65
66 ; Amount of time a worker can spend with handling a request before it
67 ; gets killed and restarted. By default set to 21600 (6hrs)
68 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
69 #timeout = 21600
70
71 ; The maximum size of HTTP request line in bytes.
72 ; 0 for unlimited
73 #limit_request_line = 0
74
75 ; Limit the number of HTTP headers fields in a request.
76 ; By default this value is 100 and can't be larger than 32768.
77 #limit_request_fields = 32768
78
79 ; Limit the allowed size of an HTTP request header field.
80 ; Value is a positive number or 0.
81 ; Setting it to 0 will allow unlimited header field sizes.
82 #limit_request_field_size = 0
83
84 ; Timeout for graceful workers restart.
85 ; After receiving a restart signal, workers have this much time to finish
86 ; serving requests. Workers still alive after the timeout (starting from the
87 ; receipt of the restart signal) are force killed.
88 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
89 #graceful_timeout = 3600
90
91 # The number of seconds to wait for requests on a Keep-Alive connection.
92 # Generally set in the 1-5 seconds range.
93 #keepalive = 2
94
95 ; Maximum memory usage that each worker can use before it will receive a
96 ; graceful restart signal 0 = memory monitoring is disabled
97 ; Examples: 268435456 (256MB), 536870912 (512MB)
98 ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
99 #memory_max_usage = 0
100
101 ; How often in seconds to check for memory usage for each gunicorn worker
102 #memory_usage_check_interval = 60
103
104 ; Threshold value for which we don't recycle worker if GarbageCollection
105 ; frees up enough resources. Before each restart we try to run GC on worker
106 ; in case we get enough free memory after that, restart will not happen.
107 #memory_usage_recovery_threshold = 0.8
108
109
110 39 [app:main]
111 40 ; The %(here)s variable will be replaced with the absolute path of parent directory
112 41 ; of this file
42 ; Each option in the app:main can be override by an environmental variable
43 ;
44 ;To override an option:
45 ;
46 ;RC_<KeyName>
47 ;Everything should be uppercase, . and - should be replaced by _.
48 ;For example, if you have these configuration settings:
49 ;rc_cache.repo_object.backend = foo
50 ;can be overridden by
51 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
52
113 53 use = egg:rhodecode-vcsserver
114 54
115 55
116 56 ; #############
117 57 ; DEBUG OPTIONS
118 58 ; #############
119 59
120 60 # During development the we want to have the debug toolbar enabled
121 61 pyramid.includes =
122 62 pyramid_debugtoolbar
123 63
124 64 debugtoolbar.hosts = 0.0.0.0/0
125 65 debugtoolbar.exclude_prefixes =
126 66 /css
127 67 /fonts
128 68 /images
129 69 /js
130 70
131 71 ; #################
132 72 ; END DEBUG OPTIONS
133 73 ; #################
134 74
135 75 ; Pyramid default locales, we need this to be set
136 pyramid.default_locale_name = en
76 #pyramid.default_locale_name = en
137 77
138 78 ; default locale used by VCS systems
139 locale = en_US.UTF-8
79 #locale = en_US.UTF-8
140 80
141 81 ; path to binaries for vcsserver, it should be set by the installer
142 ; at installation time, e.g /home/user/vcsserver-1/profile/bin
82 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
143 83 ; it can also be a path to nix-build output in case of development
144 84 core.binary_dir = ""
145 85
146 86 ; Custom exception store path, defaults to TMPDIR
147 87 ; This is used to store exception from RhodeCode in shared directory
148 88 #exception_tracker.store_path =
149 89
150 90 ; #############
151 91 ; DOGPILE CACHE
152 92 ; #############
153 93
154 94 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
155 95 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
156 cache_dir = %(here)s/data
96 #cache_dir = %(here)s/data
157 97
158 98 ; ***************************************
159 99 ; `repo_object` cache, default file based
160 100 ; ***************************************
161 101
162 102 ; `repo_object` cache settings for vcs methods for repositories
163 rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
103 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
164 104
165 105 ; cache auto-expires after N seconds
166 106 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
167 rc_cache.repo_object.expiration_time = 2592000
107 #rc_cache.repo_object.expiration_time = 2592000
168 108
169 109 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
170 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
110 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
171 111
172 112 ; ***********************************************************
173 113 ; `repo_object` cache with redis backend
174 114 ; recommended for larger instance, and for better performance
175 115 ; ***********************************************************
176 116
177 117 ; `repo_object` cache settings for vcs methods for repositories
178 118 #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
179 119
180 120 ; cache auto-expires after N seconds
181 121 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
182 122 #rc_cache.repo_object.expiration_time = 2592000
183 123
184 124 ; redis_expiration_time needs to be greater then expiration_time
185 125 #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
186 126
187 127 #rc_cache.repo_object.arguments.host = localhost
188 128 #rc_cache.repo_object.arguments.port = 6379
189 129 #rc_cache.repo_object.arguments.db = 5
190 130 #rc_cache.repo_object.arguments.socket_timeout = 30
191 131 ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
192 132 #rc_cache.repo_object.arguments.distributed_lock = true
193 133
194 134 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
195 135 #rc_cache.repo_object.arguments.lock_auto_renewal = true
196 136
197 ; Statsd client config
137 ; Statsd client config, this is used to send metrics to statsd
138 ; We recommend setting statsd_exported and scrape them using Promethues
198 139 #statsd.enabled = false
199 140 #statsd.statsd_host = 0.0.0.0
200 141 #statsd.statsd_port = 8125
201 142 #statsd.statsd_prefix =
202 143 #statsd.statsd_ipv6 = false
203 144
145 ; configure logging automatically at server startup set to false
146 ; to use the below custom logging config.
147 ; RC_LOGGING_FORMATTER
148 ; RC_LOGGING_LEVEL
149 ; env variables can control the settings for logging in case of autoconfigure
150
151 #logging.autoconfigure = true
152
153 ; specify your own custom logging config file to configure logging
154 #logging.logging_conf_file = /path/to/custom_logging.ini
155
204 156 ; #####################
205 157 ; LOGGING CONFIGURATION
206 158 ; #####################
159
207 160 [loggers]
208 161 keys = root, vcsserver
209 162
210 163 [handlers]
211 164 keys = console
212 165
213 166 [formatters]
214 keys = generic
167 keys = generic, json
215 168
216 169 ; #######
217 170 ; LOGGERS
218 171 ; #######
219 172 [logger_root]
220 173 level = NOTSET
221 174 handlers = console
222 175
223 176 [logger_vcsserver]
224 177 level = DEBUG
225 178 handlers =
226 179 qualname = vcsserver
227 180 propagate = 1
228 181
229
230 182 ; ########
231 183 ; HANDLERS
232 184 ; ########
233 185
234 186 [handler_console]
235 187 class = StreamHandler
236 188 args = (sys.stderr, )
237 189 level = DEBUG
190 ; To enable JSON formatted logs replace 'generic' with 'json'
191 ; This allows sending properly formatted logs to grafana loki or elasticsearch
238 192 formatter = generic
239 193
240 194 ; ##########
241 195 ; FORMATTERS
242 196 ; ##########
243 197
244 198 [formatter_generic]
245 199 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
246 200 datefmt = %Y-%m-%d %H:%M:%S
201
202 [formatter_json]
203 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
204 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -1,265 +1,520 b''
1 1 """
2 2 Gunicorn config extension and hooks. This config file adds some extra settings and memory management.
3 3 Gunicorn configuration should be managed by .ini files entries of RhodeCode or VCSServer
4 4 """
5 5
6 6 import gc
7 7 import os
8 8 import sys
9 9 import math
10 10 import time
11 11 import threading
12 12 import traceback
13 13 import random
14 import socket
15 import dataclasses
14 16 from gunicorn.glogging import Logger
15 17
16 18
17 19 def get_workers():
18 20 import multiprocessing
19 21 return multiprocessing.cpu_count() * 2 + 1
20 22
21 # GLOBAL
23
24 bind = "127.0.0.1:10010"
25
26
27 # Error logging output for gunicorn (-) is stdout
22 28 errorlog = '-'
29
30 # Access logging output for gunicorn (-) is stdout
23 31 accesslog = '-'
24 32
25 33
26 34 # SERVER MECHANICS
27 35 # None == system temp dir
28 36 # worker_tmp_dir is recommended to be set to some tmpfs
29 37 worker_tmp_dir = None
30 38 tmp_upload_dir = None
31 39
40 # use re-use port logic
41 #reuse_port = True
42
32 43 # Custom log format
44 #access_log_format = (
45 # '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
46
47 # loki format for easier parsing in grafana
33 48 access_log_format = (
34 '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
49 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
50
51 # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
52 # workers = get_workers()
53
54 # Gunicorn access log level
55 loglevel = 'info'
56
57 # Process name visible in a process list
58 proc_name = "rhodecode_vcsserver"
59
60 # Type of worker class, one of `sync`, `gevent` or `gthread`
61 # currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed
62 # gevent:
63 # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
64 # e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
65 # gthread:
66 # In this case, the maximum number of concurrent requests is (N workers * X threads)
67 # e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled
68 worker_class = 'sync'
69
70 # Sets the number of process workers. More workers means more concurrent connections
71 # RhodeCode can handle at the same time. Each additional worker also it increases
72 # memory usage as each has its own set of caches.
73 # The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
74 # than 8-10 unless for huge deployments .e.g 700-1000 users.
75 # `instance_id = *` must be set in the [app:main] section below (which is the default)
76 # when using more than 1 worker.
77 workers = 2
78
79 # Threads numbers for worker class gthread
80 threads = 1
81
82 # The maximum number of simultaneous clients. Valid only for gevent
83 # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
84 # e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
85 worker_connections = 10
86
87 # Max number of requests that worker will handle before being gracefully restarted.
88 # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
89 max_requests = 2000
90 max_requests_jitter = int(max_requests * 0.2) # 20% of max_requests
91
92 # The maximum number of pending connections.
93 # Exceeding this number results in the client getting an error when attempting to connect.
94 backlog = 64
35 95
36 # self adjust workers based on CPU count
37 # workers = get_workers()
96 # The Amount of time a worker can spend with handling a request before it
97 # gets killed and restarted. By default, set to 21600 (6hrs)
98 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
99 timeout = 21600
100
101 # The maximum size of HTTP request line in bytes.
102 # 0 for unlimited
103 limit_request_line = 0
104
105 # Limit the number of HTTP headers fields in a request.
106 # By default this value is 100 and can't be larger than 32768.
107 limit_request_fields = 32768
108
109 # Limit the allowed size of an HTTP request header field.
110 # Value is a positive number or 0.
111 # Setting it to 0 will allow unlimited header field sizes.
112 limit_request_field_size = 0
113
114 # Timeout for graceful workers restart.
115 # After receiving a restart signal, workers have this much time to finish
116 # serving requests. Workers still alive after the timeout (starting from the
117 # receipt of the restart signal) are force killed.
118 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
119 graceful_timeout = 21600
120
121 # The number of seconds to wait for requests on a Keep-Alive connection.
122 # Generally set in the 1-5 seconds range.
123 keepalive = 2
124
125 # Maximum memory usage that each worker can use before it will receive a
126 # graceful restart signal 0 = memory monitoring is disabled
127 # Examples: 268435456 (256MB), 536870912 (512MB)
128 # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
129 # Dynamic formula 1024 * 1024 * 256 == 256MBs
130 memory_max_usage = 0
131
132 # How often in seconds to check for memory usage for each gunicorn worker
133 memory_usage_check_interval = 60
134
135 # Threshold value for which we don't recycle worker if GarbageCollection
136 # frees up enough resources. Before each restart, we try to run GC on worker
137 # in case we get enough free memory after that; restart will not happen.
138 memory_usage_recovery_threshold = 0.8
139
140
141 @dataclasses.dataclass
142 class MemoryCheckConfig:
143 max_usage: int
144 check_interval: int
145 recovery_threshold: float
38 146
39 147
40 148 def _get_process_rss(pid=None):
41 149 try:
42 150 import psutil
43 151 if pid:
44 152 proc = psutil.Process(pid)
45 153 else:
46 154 proc = psutil.Process()
47 155 return proc.memory_info().rss
48 156 except Exception:
49 157 return None
50 158
51 159
52 160 def _get_config(ini_path):
161 import configparser
53 162
54 163 try:
55 import configparser
56 except ImportError:
57 import ConfigParser as configparser
58 try:
59 164 config = configparser.RawConfigParser()
60 165 config.read(ini_path)
61 166 return config
62 167 except Exception:
63 168 return None
64 169
65 170
66 def _time_with_offset(memory_usage_check_interval):
67 return time.time() - random.randint(0, memory_usage_check_interval/2.0)
171 def get_memory_usage_params(config=None):
172 # memory spec defaults
173 _memory_max_usage = memory_max_usage
174 _memory_usage_check_interval = memory_usage_check_interval
175 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
176
177 if config:
178 ini_path = os.path.abspath(config)
179 conf = _get_config(ini_path)
180
181 section = 'server:main'
182 if conf and conf.has_section(section):
183
184 if conf.has_option(section, 'memory_max_usage'):
185 _memory_max_usage = conf.getint(section, 'memory_max_usage')
186
187 if conf.has_option(section, 'memory_usage_check_interval'):
188 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
189
190 if conf.has_option(section, 'memory_usage_recovery_threshold'):
191 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
192
193 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
194 or _memory_max_usage)
195 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
196 or _memory_usage_check_interval)
197 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
198 or _memory_usage_recovery_threshold)
199
200 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
201
202
203 def _time_with_offset(check_interval):
204 return time.time() - random.randint(0, check_interval/2.0)
68 205
69 206
70 207 def pre_fork(server, worker):
71 208 pass
72 209
73 210
74 211 def post_fork(server, worker):
75 212
76 # memory spec defaults
77 _memory_max_usage = 0
78 _memory_usage_check_interval = 60
79 _memory_usage_recovery_threshold = 0.8
80
81 ini_path = os.path.abspath(server.cfg.paste)
82 conf = _get_config(ini_path)
83
84 section = 'server:main'
85 if conf and conf.has_section(section):
213 memory_conf = get_memory_usage_params()
214 _memory_max_usage = memory_conf.max_usage
215 _memory_usage_check_interval = memory_conf.check_interval
216 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
86 217
87 if conf.has_option(section, 'memory_max_usage'):
88 _memory_max_usage = conf.getint(section, 'memory_max_usage')
89
90 if conf.has_option(section, 'memory_usage_check_interval'):
91 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
92
93 if conf.has_option(section, 'memory_usage_recovery_threshold'):
94 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
95
96 worker._memory_max_usage = _memory_max_usage
97 worker._memory_usage_check_interval = _memory_usage_check_interval
98 worker._memory_usage_recovery_threshold = _memory_usage_recovery_threshold
218 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
219 or _memory_max_usage)
220 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
221 or _memory_usage_check_interval)
222 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
223 or _memory_usage_recovery_threshold)
99 224
100 225 # register memory last check time, with some random offset so we don't recycle all
101 226 # at once
102 227 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
103 228
104 229 if _memory_max_usage:
105 server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid,
230 server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
106 231 _format_data_size(_memory_max_usage))
107 232 else:
108 server.log.info("[%-10s] WORKER spawned", worker.pid)
233 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
109 234
110 235
111 236 def pre_exec(server):
112 237 server.log.info("Forked child, re-executing.")
113 238
114 239
115 240 def on_starting(server):
116 241 server_lbl = '{} {}'.format(server.proc_name, server.address)
117 242 server.log.info("Server %s is starting.", server_lbl)
243 server.log.info('Config:')
244 server.log.info(f"\n{server.cfg}")
245 server.log.info(get_memory_usage_params())
118 246
119 247
120 248 def when_ready(server):
121 249 server.log.info("Server %s is ready. Spawning workers", server)
122 250
123 251
124 252 def on_reload(server):
125 253 pass
126 254
127 255
128 256 def _format_data_size(size, unit="B", precision=1, binary=True):
129 257 """Format a number using SI units (kilo, mega, etc.).
130 258
131 259 ``size``: The number as a float or int.
132 260
133 261 ``unit``: The unit name in plural form. Examples: "bytes", "B".
134 262
135 263 ``precision``: How many digits to the right of the decimal point. Default
136 264 is 1. 0 suppresses the decimal point.
137 265
138 266 ``binary``: If false, use base-10 decimal prefixes (kilo = K = 1000).
139 267 If true, use base-2 binary prefixes (kibi = Ki = 1024).
140 268
141 269 ``full_name``: If false (default), use the prefix abbreviation ("k" or
142 270 "Ki"). If true, use the full prefix ("kilo" or "kibi"). If false,
143 271 use abbreviation ("k" or "Ki").
144 272
145 273 """
146 274
147 275 if not binary:
148 276 base = 1000
149 277 multiples = ('', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')
150 278 else:
151 279 base = 1024
152 280 multiples = ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi')
153 281
154 282 sign = ""
155 283 if size > 0:
156 284 m = int(math.log(size, base))
157 285 elif size < 0:
158 286 sign = "-"
159 287 size = -size
160 288 m = int(math.log(size, base))
161 289 else:
162 290 m = 0
163 291 if m > 8:
164 292 m = 8
165 293
166 294 if m == 0:
167 295 precision = '%.0f'
168 296 else:
169 297 precision = '%%.%df' % precision
170 298
171 299 size = precision % (size / math.pow(base, m))
172 300
173 301 return '%s%s %s%s' % (sign, size.strip(), multiples[m], unit)
174 302
175 303
176 304 def _check_memory_usage(worker):
177 memory_max_usage = worker._memory_max_usage
178 if not memory_max_usage:
305 _memory_max_usage = worker._memory_max_usage
306 if not _memory_max_usage:
179 307 return
180 308
181 memory_usage_check_interval = worker._memory_usage_check_interval
182 memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
309 _memory_usage_check_interval = worker._memory_usage_check_interval
310 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
183 311
184 312 elapsed = time.time() - worker._last_memory_check_time
185 if elapsed > memory_usage_check_interval:
313 if elapsed > _memory_usage_check_interval:
186 314 mem_usage = _get_process_rss()
187 if mem_usage and mem_usage > memory_max_usage:
315 if mem_usage and mem_usage > _memory_max_usage:
188 316 worker.log.info(
189 317 "memory usage %s > %s, forcing gc",
190 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
318 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
191 319 # Try to clean it up by forcing a full collection.
192 320 gc.collect()
193 321 mem_usage = _get_process_rss()
194 if mem_usage > memory_usage_recovery_threshold:
322 if mem_usage > _memory_usage_recovery_threshold:
195 323 # Didn't clean up enough, we'll have to terminate.
196 324 worker.log.warning(
197 325 "memory usage %s > %s after gc, quitting",
198 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
326 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
199 327 # This will cause worker to auto-restart itself
200 328 worker.alive = False
201 329 worker._last_memory_check_time = time.time()
202 330
203 331
204 332 def worker_int(worker):
205 worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid)
333 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
206 334
207 # get traceback info, on worker crash
208 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
335 # get traceback info, when a worker crashes
336 def get_thread_id(t_id):
337 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
338 return id2name.get(t_id, "unknown_thread_id")
339
209 340 code = []
210 for thread_id, stack in sys._current_frames().items():
341 for thread_id, stack in sys._current_frames().items(): # noqa
211 342 code.append(
212 "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id))
343 "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
213 344 for fname, lineno, name, line in traceback.extract_stack(stack):
214 345 code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
215 346 if line:
216 347 code.append(" %s" % (line.strip()))
217 348 worker.log.debug("\n".join(code))
218 349
219 350
220 351 def worker_abort(worker):
221 worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid)
352 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
222 353
223 354
224 355 def worker_exit(server, worker):
225 worker.log.info("[%-10s] worker exit", worker.pid)
356 worker.log.info("pid=[%-10s] worker exit", worker.pid)
226 357
227 358
228 359 def child_exit(server, worker):
229 worker.log.info("[%-10s] worker child exit", worker.pid)
360 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
230 361
231 362
232 363 def pre_request(worker, req):
233 364 worker.start_time = time.time()
234 365 worker.log.debug(
235 366 "GNCRN PRE WORKER [cnt:%s]: %s %s", worker.nr, req.method, req.path)
236 367
237 368
238 369 def post_request(worker, req, environ, resp):
239 370 total_time = time.time() - worker.start_time
240 371 # Gunicorn sometimes has problems with reading the status_code
241 372 status_code = getattr(resp, 'status_code', '')
242 373 worker.log.debug(
243 374 "GNCRN POST WORKER [cnt:%s]: %s %s resp: %s, Load Time: %.4fs",
244 375 worker.nr, req.method, req.path, status_code, total_time)
245 376 _check_memory_usage(worker)
246 377
247 378
379 def _filter_proxy(ip):
380 """
381 Passed in IP addresses in HEADERS can be in a special format of multiple
382 ips. Those comma separated IPs are passed from various proxies in the
383 chain of request processing. The left-most being the original client.
384 We only care about the first IP which came from the org. client.
385
386 :param ip: ip string from headers
387 """
388 if ',' in ip:
389 _ips = ip.split(',')
390 _first_ip = _ips[0].strip()
391 return _first_ip
392 return ip
393
394
395 def _filter_port(ip):
396 """
397 Removes a port from ip, there are 4 main cases to handle here.
398 - ipv4 eg. 127.0.0.1
399 - ipv6 eg. ::1
400 - ipv4+port eg. 127.0.0.1:8080
401 - ipv6+port eg. [::1]:8080
402
403 :param ip:
404 """
405 def is_ipv6(ip_addr):
406 if hasattr(socket, 'inet_pton'):
407 try:
408 socket.inet_pton(socket.AF_INET6, ip_addr)
409 except socket.error:
410 return False
411 else:
412 return False
413 return True
414
415 if ':' not in ip: # must be ipv4 pure ip
416 return ip
417
418 if '[' in ip and ']' in ip: # ipv6 with port
419 return ip.split(']')[0][1:].lower()
420
421 # must be ipv6 or ipv4 with port
422 if is_ipv6(ip):
423 return ip
424 else:
425 ip, _port = ip.split(':')[:2] # means ipv4+port
426 return ip
427
428
429 def get_ip_addr(environ):
430 proxy_key = 'HTTP_X_REAL_IP'
431 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
432 def_key = 'REMOTE_ADDR'
433
434 def _filters(x):
435 return _filter_port(_filter_proxy(x))
436
437 ip = environ.get(proxy_key)
438 if ip:
439 return _filters(ip)
440
441 ip = environ.get(proxy_key2)
442 if ip:
443 return _filters(ip)
444
445 ip = environ.get(def_key, '0.0.0.0')
446 return _filters(ip)
447
448
248 449 class RhodeCodeLogger(Logger):
249 450 """
250 451 Custom Logger that allows some customization that gunicorn doesn't allow
251 452 """
252 453
253 454 datefmt = r"%Y-%m-%d %H:%M:%S"
254 455
255 456 def __init__(self, cfg):
256 457 Logger.__init__(self, cfg)
257 458
258 459 def now(self):
259 460 """ return date in RhodeCode Log format """
260 461 now = time.time()
261 msecs = int((now - long(now)) * 1000)
462 msecs = int((now - int(now)) * 1000)
262 463 return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
263 464
465 def atoms(self, resp, req, environ, request_time):
466 """ Gets atoms for log formatting.
467 """
468 status = resp.status
469 if isinstance(status, str):
470 status = status.split(None, 1)[0]
471 atoms = {
472 'h': get_ip_addr(environ),
473 'l': '-',
474 'u': self._get_user(environ) or '-',
475 't': self.now(),
476 'r': "%s %s %s" % (environ['REQUEST_METHOD'],
477 environ['RAW_URI'],
478 environ["SERVER_PROTOCOL"]),
479 's': status,
480 'm': environ.get('REQUEST_METHOD'),
481 'U': environ.get('PATH_INFO'),
482 'q': environ.get('QUERY_STRING'),
483 'H': environ.get('SERVER_PROTOCOL'),
484 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
485 'B': getattr(resp, 'sent', None),
486 'f': environ.get('HTTP_REFERER', '-'),
487 'a': environ.get('HTTP_USER_AGENT', '-'),
488 'T': request_time.seconds,
489 'D': (request_time.seconds * 1000000) + request_time.microseconds,
490 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
491 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
492 'p': "<%s>" % os.getpid()
493 }
494
495 # add request headers
496 if hasattr(req, 'headers'):
497 req_headers = req.headers
498 else:
499 req_headers = req
500
501 if hasattr(req_headers, "items"):
502 req_headers = req_headers.items()
503
504 atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
505
506 resp_headers = resp.headers
507 if hasattr(resp_headers, "items"):
508 resp_headers = resp_headers.items()
509
510 # add response headers
511 atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
512
513 # add environ variables
514 environ_variables = environ.items()
515 atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
516
517 return atoms
518
264 519
265 520 logger_class = RhodeCodeLogger
@@ -1,209 +1,167 b''
1 ## -*- coding: utf-8 -*-
1 #
2 2
3 3 ; #################################
4 4 ; RHODECODE VCSSERVER CONFIGURATION
5 5 ; #################################
6 6
7 7 [server:main]
8 8 ; COMMON HOST/IP CONFIG
9 9 host = 127.0.0.1
10 port = 9900
10 port = 10010
11 11
12 12
13 13 ; ###########################
14 14 ; GUNICORN APPLICATION SERVER
15 15 ; ###########################
16 16
17 ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
17 ; run with gunicorn --paste rhodecode.ini
18 18
19 19 ; Module to use, this setting shouldn't be changed
20 20 use = egg:gunicorn#main
21 21
22 ; Sets the number of process workers. More workers means more concurrent connections
23 ; RhodeCode can handle at the same time. Each additional worker also it increases
24 ; memory usage as each has it's own set of caches.
25 ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
26 ; than 8-10 unless for really big deployments .e.g 700-1000 users.
27 ; `instance_id = *` must be set in the [app:main] section below (which is the default)
28 ; when using more than 1 worker.
29 workers = 2
30
31 ; Gunicorn access log level
32 loglevel = info
33
34 ; Process name visible in process list
35 proc_name = rhodecode_vcsserver
36
37 ; Type of worker class, one of `sync`, `gevent`
38 ; currently `sync` is the only option allowed.
39 worker_class = sync
40
41 ; The maximum number of simultaneous clients. Valid only for gevent
42 worker_connections = 10
43
44 ; Max number of requests that worker will handle before being gracefully restarted.
45 ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
46 max_requests = 1000
47 max_requests_jitter = 30
48
49 ; Amount of time a worker can spend with handling a request before it
50 ; gets killed and restarted. By default set to 21600 (6hrs)
51 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
52 timeout = 21600
53
54 ; The maximum size of HTTP request line in bytes.
55 ; 0 for unlimited
56 limit_request_line = 0
57
58 ; Limit the number of HTTP headers fields in a request.
59 ; By default this value is 100 and can't be larger than 32768.
60 limit_request_fields = 32768
61
62 ; Limit the allowed size of an HTTP request header field.
63 ; Value is a positive number or 0.
64 ; Setting it to 0 will allow unlimited header field sizes.
65 limit_request_field_size = 0
66
67 ; Timeout for graceful workers restart.
68 ; After receiving a restart signal, workers have this much time to finish
69 ; serving requests. Workers still alive after the timeout (starting from the
70 ; receipt of the restart signal) are force killed.
71 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
72 graceful_timeout = 3600
73
74 # The number of seconds to wait for requests on a Keep-Alive connection.
75 # Generally set in the 1-5 seconds range.
76 keepalive = 2
77
78 ; Maximum memory usage that each worker can use before it will receive a
79 ; graceful restart signal 0 = memory monitoring is disabled
80 ; Examples: 268435456 (256MB), 536870912 (512MB)
81 ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
82 memory_max_usage = 0
83
84 ; How often in seconds to check for memory usage for each gunicorn worker
85 memory_usage_check_interval = 60
86
87 ; Threshold value for which we don't recycle worker if GarbageCollection
88 ; frees up enough resources. Before each restart we try to run GC on worker
89 ; in case we get enough free memory after that, restart will not happen.
90 memory_usage_recovery_threshold = 0.8
91
92
93 22 [app:main]
94 23 ; The %(here)s variable will be replaced with the absolute path of parent directory
95 24 ; of this file
25 ; Each option in the app:main can be override by an environmental variable
26 ;
27 ;To override an option:
28 ;
29 ;RC_<KeyName>
30 ;Everything should be uppercase, . and - should be replaced by _.
31 ;For example, if you have these configuration settings:
32 ;rc_cache.repo_object.backend = foo
33 ;can be overridden by
34 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
35
96 36 use = egg:rhodecode-vcsserver
97 37
98 38 ; Pyramid default locales, we need this to be set
99 pyramid.default_locale_name = en
39 #pyramid.default_locale_name = en
100 40
101 41 ; default locale used by VCS systems
102 locale = en_US.UTF-8
42 #locale = en_US.UTF-8
103 43
104 44 ; path to binaries for vcsserver, it should be set by the installer
105 ; at installation time, e.g /home/user/vcsserver-1/profile/bin
45 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
106 46 ; it can also be a path to nix-build output in case of development
107 47 core.binary_dir = ""
108 48
109 49 ; Custom exception store path, defaults to TMPDIR
110 50 ; This is used to store exception from RhodeCode in shared directory
111 51 #exception_tracker.store_path =
112 52
113 53 ; #############
114 54 ; DOGPILE CACHE
115 55 ; #############
116 56
117 57 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
118 58 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
119 cache_dir = %(here)s/data
59 #cache_dir = %(here)s/data
120 60
121 61 ; ***************************************
122 62 ; `repo_object` cache, default file based
123 63 ; ***************************************
124 64
125 65 ; `repo_object` cache settings for vcs methods for repositories
126 rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
66 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
127 67
128 68 ; cache auto-expires after N seconds
129 69 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
130 rc_cache.repo_object.expiration_time = 2592000
70 #rc_cache.repo_object.expiration_time = 2592000
131 71
132 72 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
133 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
73 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
134 74
135 75 ; ***********************************************************
136 76 ; `repo_object` cache with redis backend
137 77 ; recommended for larger instance, and for better performance
138 78 ; ***********************************************************
139 79
140 80 ; `repo_object` cache settings for vcs methods for repositories
141 81 #rc_cache.repo_object.backend = dogpile.cache.rc.redis_msgpack
142 82
143 83 ; cache auto-expires after N seconds
144 84 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
145 85 #rc_cache.repo_object.expiration_time = 2592000
146 86
147 87 ; redis_expiration_time needs to be greater then expiration_time
148 88 #rc_cache.repo_object.arguments.redis_expiration_time = 3592000
149 89
150 90 #rc_cache.repo_object.arguments.host = localhost
151 91 #rc_cache.repo_object.arguments.port = 6379
152 92 #rc_cache.repo_object.arguments.db = 5
153 93 #rc_cache.repo_object.arguments.socket_timeout = 30
154 94 ; more Redis options: https://dogpilecache.sqlalchemy.org/en/latest/api.html#redis-backends
155 95 #rc_cache.repo_object.arguments.distributed_lock = true
156 96
157 97 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
158 98 #rc_cache.repo_object.arguments.lock_auto_renewal = true
159 99
160 ; Statsd client config
100 ; Statsd client config, this is used to send metrics to statsd
101 ; We recommend setting statsd_exported and scrape them using Promethues
161 102 #statsd.enabled = false
162 103 #statsd.statsd_host = 0.0.0.0
163 104 #statsd.statsd_port = 8125
164 105 #statsd.statsd_prefix =
165 106 #statsd.statsd_ipv6 = false
166 107
108 ; configure logging automatically at server startup set to false
109 ; to use the below custom logging config.
110 ; RC_LOGGING_FORMATTER
111 ; RC_LOGGING_LEVEL
112 ; env variables can control the settings for logging in case of autoconfigure
113
114 #logging.autoconfigure = true
115
116 ; specify your own custom logging config file to configure logging
117 #logging.logging_conf_file = /path/to/custom_logging.ini
118
167 119 ; #####################
168 120 ; LOGGING CONFIGURATION
169 121 ; #####################
122
170 123 [loggers]
171 124 keys = root, vcsserver
172 125
173 126 [handlers]
174 127 keys = console
175 128
176 129 [formatters]
177 keys = generic
130 keys = generic, json
178 131
179 132 ; #######
180 133 ; LOGGERS
181 134 ; #######
182 135 [logger_root]
183 136 level = NOTSET
184 137 handlers = console
185 138
186 139 [logger_vcsserver]
187 level = DEBUG
140 level = INFO
188 141 handlers =
189 142 qualname = vcsserver
190 143 propagate = 1
191 144
192
193 145 ; ########
194 146 ; HANDLERS
195 147 ; ########
196 148
197 149 [handler_console]
198 150 class = StreamHandler
199 151 args = (sys.stderr, )
200 152 level = INFO
153 ; To enable JSON formatted logs replace 'generic' with 'json'
154 ; This allows sending properly formatted logs to grafana loki or elasticsearch
201 155 formatter = generic
202 156
203 157 ; ##########
204 158 ; FORMATTERS
205 159 ; ##########
206 160
207 161 [formatter_generic]
208 162 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
209 163 datefmt = %Y-%m-%d %H:%M:%S
164
165 [formatter_json]
166 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
167 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -1,57 +1,56 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import socket
19
20 19 import pytest
21 20
22 21
23 22 def pytest_addoption(parser):
24 23 parser.addoption(
25 '--repeat', type=int, default=100,
24 '--perf-repeat-vcs', type=int, default=100,
26 25 help="Number of repetitions in performance tests.")
27 26
28 27
29 28 @pytest.fixture(scope='session')
30 29 def repeat(request):
31 30 """
32 31 The number of repetitions is based on this fixture.
33 32
34 33 Slower calls may divide it by 10 or 100. It is chosen in a way so that the
35 34 tests are not too slow in our default test suite.
36 35 """
37 return request.config.getoption('--repeat')
36 return request.config.getoption('--perf-repeat-vcs')
38 37
39 38
40 39 @pytest.fixture(scope='session')
41 40 def vcsserver_port(request):
42 41 port = get_available_port()
43 print('Using vcsserver port %s' % (port, ))
42 print(f'Using vcsserver port {port}')
44 43 return port
45 44
46 45
47 46 def get_available_port():
48 47 family = socket.AF_INET
49 48 socktype = socket.SOCK_STREAM
50 49 host = '127.0.0.1'
51 50
52 51 mysocket = socket.socket(family, socktype)
53 52 mysocket.bind((host, 0))
54 53 port = mysocket.getsockname()[1]
55 54 mysocket.close()
56 55 del mysocket
57 56 return port
@@ -1,48 +1,77 b''
1 ## dependencies
2
3 # our custom configobj
4 https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626#egg=configobj==5.0.6
5
6 dogpile.cache==0.9.0
7 dogpile.core==0.4.1
8 decorator==4.1.2
9 dulwich==0.13.0
10 hgsubversion==1.9.3
11 hg-evolve==9.1.0
12 mako==1.1.0
13 markupsafe==1.1.1
14 mercurial==5.1.1
15 msgpack-python==0.5.6
16
17 pastedeploy==2.1.0
18 pyramid==1.10.4
19 pyramid-mako==1.1.0
20 pygit2==0.28.2
1 # deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]'
21 2
3 async-timeout==4.0.3
4 atomicwrites==1.4.1
5 celery==5.3.6
6 billiard==4.2.0
7 click==8.1.3
8 click-didyoumean==0.3.0
9 click==8.1.3
10 click-plugins==1.1.1
11 click==8.1.3
12 click-repl==0.2.0
13 click==8.1.3
14 prompt-toolkit==3.0.38
15 wcwidth==0.2.6
16 six==1.16.0
17 kombu==5.3.5
18 amqp==5.2.0
19 vine==5.1.0
20 vine==5.1.0
21 python-dateutil==2.8.2
22 six==1.16.0
23 tzdata==2023.4
24 vine==5.1.0
25 contextlib2==21.6.0
26 cov-core==1.15.0
27 coverage==7.2.3
28 diskcache==5.6.3
29 dogpile.cache==1.3.0
30 decorator==5.1.1
31 stevedore==5.1.0
32 pbr==5.11.1
33 dulwich==0.21.6
34 urllib3==1.26.14
35 gunicorn==21.2.0
36 packaging==23.1
37 hg-evolve==11.0.2
38 importlib-metadata==6.0.0
39 zipp==3.15.0
40 mercurial==6.3.3
41 mock==5.0.2
42 more-itertools==9.1.0
43 msgpack==1.0.7
44 orjson==3.9.13
45 psutil==5.9.8
46 py==1.11.0
47 pygit2==1.13.3
48 cffi==1.16.0
49 pycparser==2.21
50 pygments==2.15.1
51 pyparsing==3.1.1
52 pyramid==2.0.2
53 hupper==1.12
54 plaster==1.1.2
55 plaster-pastedeploy==1.0.1
56 pastedeploy==3.1.0
57 plaster==1.1.2
58 translationstring==1.4
59 venusian==3.0.0
60 webob==1.8.7
61 zope.deprecation==5.0.0
62 zope.interface==6.1.0
63 redis==5.0.1
64 async-timeout==4.0.3
22 65 repoze.lru==0.7
23 redis==3.5.3
24 simplejson==3.16.0
25 subprocess32==3.5.4
26 subvertpy==0.10.1
66 scandir==1.10.0
67 setproctitle==1.3.3
68 subvertpy==0.11.0
69 waitress==3.0.0
70 wcwidth==0.2.6
27 71
28 six==1.11.0
29 translationstring==1.3
30 webob==1.8.5
31 zope.deprecation==4.4.0
32 zope.interface==4.6.0
33
34 ## http servers
35 gevent==1.5.0
36 greenlet==0.4.15
37 gunicorn==19.9.0
38 waitress==1.3.1
39
40 ## debug
41 ipdb==0.13.2
42 ipython==5.1.0
43 72
44 73 ## test related requirements
45 -r requirements_test.txt
74 #-r requirements_test.txt
46 75
47 76 ## uncomment to add the debug libraries
48 77 #-r requirements_debug.txt
@@ -1,8 +1,28 b''
1 1 ## special libraries we could extend the requirements.txt file with to add some
2 ## custom libraries useful for debug and memory tracing
3
4 ## uncomment inclusion of this file in requirements.txt run make generate-pkgs and nix-shell
2 ## custom libraries usefull for debug and memory tracing
5 3
6 4 objgraph
7 5 memory-profiler
8 6 pympler
7
8 ## debug
9 ipdb
10 ipython
11 rich
12
13 # format
14 flake8
15 ruff
16
17 pipdeptree==2.7.1
18 invoke==2.0.0
19 bumpversion==0.6.0
20 bump2version==1.0.1
21
22 docutils-stubs
23 types-redis
24 types-requests==2.31.0.6
25 types-sqlalchemy
26 types-psutil
27 types-pycurl
28 types-ujson
@@ -1,16 +1,45 b''
1 1 # test related requirements
2 pytest==4.6.5
3 py==1.8.0
4 pytest-cov==2.7.1
5 pytest-sugar==0.9.2
6 pytest-runner==5.1.0
2
3 cov-core==1.15.0
4 coverage==7.2.3
5 mock==5.0.2
6 py==1.11.0
7 pytest-cov==4.0.0
8 coverage==7.2.3
9 pytest==7.3.1
10 attrs==22.2.0
11 iniconfig==2.0.0
12 packaging==23.1
13 pluggy==1.0.0
7 14 pytest-profiling==1.7.0
8 pytest-timeout==1.3.3
9 gprof2dot==2017.9.19
15 gprof2dot==2022.7.29
16 pytest==7.3.1
17 attrs==22.2.0
18 iniconfig==2.0.0
19 packaging==23.1
20 pluggy==1.0.0
21 six==1.16.0
22 pytest-runner==6.0.0
23 pytest-sugar==0.9.7
24 packaging==23.1
25 pytest==7.3.1
26 attrs==22.2.0
27 iniconfig==2.0.0
28 packaging==23.1
29 pluggy==1.0.0
30 termcolor==2.3.0
31 pytest-timeout==2.1.0
32 pytest==7.3.1
33 attrs==22.2.0
34 iniconfig==2.0.0
35 packaging==23.1
36 pluggy==1.0.0
37 webtest==3.0.0
38 beautifulsoup4==4.11.2
39 soupsieve==2.4
40 waitress==3.0.0
41 webob==1.8.7
10 42
11 mock==3.0.5
12 cov-core==1.15.0
13 coverage==4.5.4
14
15 webtest==2.0.34
16 beautifulsoup4==4.6.3
43 # RhodeCode test-data
44 rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
45 rc_testdata==0.11.0
@@ -1,1 +1,1 b''
1 4.27.1 No newline at end of file
1 5.0.0 No newline at end of file
@@ -1,28 +1,41 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import pkgutil
18 import os
19
20 __version__ = ''
19 21
20 22
21 __version__ = pkgutil.get_data('vcsserver', 'VERSION').strip()
23 def get_version():
24 global __version__
25 if __version__:
26 return __version__
27
28 here = os.path.abspath(os.path.dirname(__file__))
29 ver_file = os.path.join(here, "VERSION")
30 with open(ver_file, "rt") as f:
31 version = f.read().strip()
32
33 __version__ = version
34 return version
22 35
23 36 # link to config for pyramid
24 37 CONFIG = {}
25 38
26 39 # Populated with the settings dictionary from application init in
27 40 #
28 41 PYRAMID_SETTINGS = {}
@@ -1,130 +1,193 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import os
18 18 import sys
19 import traceback
19 import tempfile
20 20 import logging
21 import urlparse
21 import urllib.parse
22
23 from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
22 24
23 25 from vcsserver import exceptions
24 26 from vcsserver.exceptions import NoContentException
25 from vcsserver.hgcompat import (archival)
26
27 from vcsserver.hgcompat import archival
28 from vcsserver.str_utils import safe_bytes
29 from vcsserver.lib.exc_tracking import format_exc
27 30 log = logging.getLogger(__name__)
28 31
29 32
30 class RepoFactory(object):
33 class RepoFactory:
31 34 """
32 35 Utility to create instances of repository
33 36
34 37 It provides internal caching of the `repo` object based on
35 38 the :term:`call context`.
36 39 """
37 40 repo_type = None
38 41
39 42 def __init__(self):
40 43 pass
41 44
42 45 def _create_config(self, path, config):
43 46 config = {}
44 47 return config
45 48
46 49 def _create_repo(self, wire, create):
47 50 raise NotImplementedError()
48 51
49 52 def repo(self, wire, create=False):
50 53 raise NotImplementedError()
51 54
52 55
53 56 def obfuscate_qs(query_string):
54 57 if query_string is None:
55 58 return None
56 59
57 60 parsed = []
58 for k, v in urlparse.parse_qsl(query_string, keep_blank_values=True):
61 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
59 62 if k in ['auth_token', 'api_key']:
60 63 v = "*****"
61 64 parsed.append((k, v))
62 65
63 66 return '&'.join('{}{}'.format(
64 k, '={}'.format(v) if v else '') for k, v in parsed)
67 k, f'={v}' if v else '') for k, v in parsed)
65 68
66 69
67 def raise_from_original(new_type):
70 def raise_from_original(new_type, org_exc: Exception):
68 71 """
69 72 Raise a new exception type with original args and traceback.
70 73 """
71 exc_type, exc_value, exc_traceback = sys.exc_info()
74 exc_info = sys.exc_info()
75 exc_type, exc_value, exc_traceback = exc_info
72 76 new_exc = new_type(*exc_value.args)
77
73 78 # store the original traceback into the new exc
74 new_exc._org_exc_tb = traceback.format_exc(exc_traceback)
79 new_exc._org_exc_tb = format_exc(exc_info)
75 80
76 81 try:
77 raise new_exc, None, exc_traceback
82 raise new_exc.with_traceback(exc_traceback)
78 83 finally:
79 84 del exc_traceback
80 85
81 86
82 class ArchiveNode(object):
87 class ArchiveNode:
83 88 def __init__(self, path, mode, is_link, raw_bytes):
84 89 self.path = path
85 90 self.mode = mode
86 91 self.is_link = is_link
87 92 self.raw_bytes = raw_bytes
88 93
89 94
90 def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
91 archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
95 def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
96 commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
92 97 """
93 walker should be a file walker, for example:
94 def walker():
98 Function that would store generate archive and send it to a dedicated backend store
99 In here we use diskcache
100
101 :param node_walker: a generator returning nodes to add to archive
102 :param archive_key: key used to store the path
103 :param kind: archive kind
104 :param mtime: time of creation
105 :param archive_at_path: default '/' the path at archive was started.
106 If this is not '/' it means it's a partial archive
107 :param archive_dir_name: inside dir name when creating an archive
108 :param commit_id: commit sha of revision archive was created at
109 :param write_metadata:
110 :param extra_metadata:
111 :param cache_config:
112
113 walker should be a file walker, for example,
114 def node_walker():
95 115 for file_info in files:
96 116 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
97 117 """
98 118 extra_metadata = extra_metadata or {}
99 119
120 d_cache = get_archival_cache_store(config=cache_config)
121
122 if archive_key in d_cache:
123 with d_cache as d_cache_reader:
124 reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
125 return reader.name
126
127 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
128 log.debug('Creating new temp archive in %s', archive_tmp_path)
129
100 130 if kind == "tgz":
101 archiver = archival.tarit(archive_dest_path, mtime, "gz")
131 archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
102 132 elif kind == "tbz2":
103 archiver = archival.tarit(archive_dest_path, mtime, "bz2")
133 archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
104 134 elif kind == 'zip':
105 archiver = archival.zipit(archive_dest_path, mtime)
135 archiver = archival.zipit(archive_tmp_path, mtime)
106 136 else:
107 137 raise exceptions.ArchiveException()(
108 'Remote does not support: "%s" archive type.' % kind)
138 f'Remote does not support: "{kind}" archive type.')
109 139
110 for f in walker(commit_id, archive_at_path):
111 f_path = os.path.join(archive_dir_name, f.path.lstrip('/'))
140 for f in node_walker(commit_id, archive_at_path):
141 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
112 142 try:
113 143 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
114 144 except NoContentException:
115 145 # NOTE(marcink): this is a special case for SVN so we can create "empty"
116 # directories which arent supported by archiver
117 archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '')
146 # directories which are not supported by archiver
147 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
118 148
119 149 if write_metadata:
120 150 metadata = dict([
121 151 ('commit_id', commit_id),
122 152 ('mtime', mtime),
123 153 ])
124 154 metadata.update(extra_metadata)
125 155
126 meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()]
127 f_path = os.path.join(archive_dir_name, '.archival.txt')
128 archiver.addfile(f_path, 0o644, False, '\n'.join(meta))
156 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
157 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
158 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
159
160 archiver.done()
161
162 # ensure set & get are atomic
163 with d_cache.transact():
164
165 with open(archive_tmp_path, 'rb') as archive_file:
166 add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
167 if not add_result:
168 log.error('Failed to store cache for key=%s', archive_key)
169
170 os.remove(archive_tmp_path)
129 171
130 return archiver.done()
172 reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
173 if not reader:
174 raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
175
176 return reader.name
177
178
179 class BinaryEnvelope:
180 def __init__(self, val):
181 self.val = val
182
183
184 class BytesEnvelope(bytes):
185 def __new__(cls, content):
186 if isinstance(content, bytes):
187 return super().__new__(cls, content)
188 else:
189 raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
190
191
192 class BinaryBytesEnvelope(BytesEnvelope):
193 pass
@@ -1,8 +1,10 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Provides a stub implementation for VCS operations.
3 5
4 6 Intended usage is to help in performance measurements. The basic idea is to
5 7 implement an `EchoApp` which sends back what it gets. Based on a configuration
6 8 parameter this app can be activated, so that it replaced the endpoints for Git
7 9 and Mercurial.
8 10 """
@@ -1,54 +1,56 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Implementation of :class:`EchoApp`.
3 5
4 6 This WSGI application will just echo back the data which it recieves.
5 7 """
6 8
7 9 import logging
8 10
9 11
10 12 log = logging.getLogger(__name__)
11 13
12 14
13 class EchoApp(object):
15 class EchoApp:
14 16
15 17 def __init__(self, repo_path, repo_name, config):
16 18 self._repo_path = repo_path
17 19 log.info("EchoApp initialized for %s", repo_path)
18 20
19 21 def __call__(self, environ, start_response):
20 22 log.debug("EchoApp called for %s", self._repo_path)
21 23 log.debug("Content-Length: %s", environ.get('CONTENT_LENGTH'))
22 24 environ['wsgi.input'].read()
23 25 status = '200 OK'
24 26 headers = [('Content-Type', 'text/plain')]
25 27 start_response(status, headers)
26 return ["ECHO"]
28 return [b"ECHO"]
27 29
28 30
29 class EchoAppStream(object):
31 class EchoAppStream:
30 32
31 33 def __init__(self, repo_path, repo_name, config):
32 34 self._repo_path = repo_path
33 35 log.info("EchoApp initialized for %s", repo_path)
34 36
35 37 def __call__(self, environ, start_response):
36 38 log.debug("EchoApp called for %s", self._repo_path)
37 39 log.debug("Content-Length: %s", environ.get('CONTENT_LENGTH'))
38 40 environ['wsgi.input'].read()
39 41 status = '200 OK'
40 42 headers = [('Content-Type', 'text/plain')]
41 43 start_response(status, headers)
42 44
43 45 def generator():
44 for _ in xrange(1000000):
45 yield "ECHO"
46 for _ in range(1000000):
47 yield b"ECHO_STREAM"
46 48 return generator()
47 49
48 50
49 51 def create_app():
50 52 """
51 53 Allows to run this app directly in a WSGI server.
52 54 """
53 55 stub_config = {}
54 56 return EchoApp('stub_path', 'stub_name', stub_config)
@@ -1,45 +1,47 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Provides the same API as :mod:`remote_wsgi`.
3 5
4 6 Uses the `EchoApp` instead of real implementations.
5 7 """
6 8
7 9 import logging
8 10
9 11 from .echo_app import EchoApp
10 12 from vcsserver import wsgi_app_caller
11 13
12 14
13 15 log = logging.getLogger(__name__)
14 16
15 17
16 class GitRemoteWsgi(object):
18 class GitRemoteWsgi:
17 19 def handle(self, environ, input_data, *args, **kwargs):
18 20 app = wsgi_app_caller.WSGIAppCaller(
19 21 create_echo_wsgi_app(*args, **kwargs))
20 22
21 23 return app.handle(environ, input_data)
22 24
23 25
24 class HgRemoteWsgi(object):
26 class HgRemoteWsgi:
25 27 def handle(self, environ, input_data, *args, **kwargs):
26 28 app = wsgi_app_caller.WSGIAppCaller(
27 29 create_echo_wsgi_app(*args, **kwargs))
28 30
29 31 return app.handle(environ, input_data)
30 32
31 33
32 34 def create_echo_wsgi_app(repo_path, repo_name, config):
33 35 log.debug("Creating EchoApp WSGI application")
34 36
35 37 _assert_valid_config(config)
36 38
37 39 # Remaining items are forwarded to have the extras available
38 40 return EchoApp(repo_path, repo_name, config=config)
39 41
40 42
41 43 def _assert_valid_config(config):
42 44 config = config.copy()
43 45
44 46 # This is what git needs from config at this stage
45 47 config.pop('git_update_server_info')
@@ -1,125 +1,125 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """
19 19 Special exception handling over the wire.
20 20
21 21 Since we cannot assume that our client is able to import our exception classes,
22 22 this module provides a "wrapping" mechanism to raise plain exceptions
23 23 which contain an extra attribute `_vcs_kind` to allow a client to distinguish
24 24 different error conditions.
25 25 """
26 26
27 27 from pyramid.httpexceptions import HTTPLocked, HTTPForbidden
28 28
29 29
30 30 def _make_exception(kind, org_exc, *args):
31 31 """
32 32 Prepares a base `Exception` instance to be sent over the wire.
33 33
34 34 To give our caller a hint what this is about, it will attach an attribute
35 35 `_vcs_kind` to the exception.
36 36 """
37 37 exc = Exception(*args)
38 38 exc._vcs_kind = kind
39 39 exc._org_exc = org_exc
40 40 exc._org_exc_tb = getattr(org_exc, '_org_exc_tb', '')
41 41 return exc
42 42
43 43
44 44 def AbortException(org_exc=None):
45 45 def _make_exception_wrapper(*args):
46 46 return _make_exception('abort', org_exc, *args)
47 47 return _make_exception_wrapper
48 48
49 49
50 50 def ArchiveException(org_exc=None):
51 51 def _make_exception_wrapper(*args):
52 52 return _make_exception('archive', org_exc, *args)
53 53 return _make_exception_wrapper
54 54
55 55
56 56 def LookupException(org_exc=None):
57 57 def _make_exception_wrapper(*args):
58 58 return _make_exception('lookup', org_exc, *args)
59 59 return _make_exception_wrapper
60 60
61 61
62 62 def VcsException(org_exc=None):
63 63 def _make_exception_wrapper(*args):
64 64 return _make_exception('error', org_exc, *args)
65 65 return _make_exception_wrapper
66 66
67 67
68 68 def RepositoryLockedException(org_exc=None):
69 69 def _make_exception_wrapper(*args):
70 70 return _make_exception('repo_locked', org_exc, *args)
71 71 return _make_exception_wrapper
72 72
73 73
74 74 def RepositoryBranchProtectedException(org_exc=None):
75 75 def _make_exception_wrapper(*args):
76 76 return _make_exception('repo_branch_protected', org_exc, *args)
77 77 return _make_exception_wrapper
78 78
79 79
80 80 def RequirementException(org_exc=None):
81 81 def _make_exception_wrapper(*args):
82 82 return _make_exception('requirement', org_exc, *args)
83 83 return _make_exception_wrapper
84 84
85 85
86 86 def UnhandledException(org_exc=None):
87 87 def _make_exception_wrapper(*args):
88 88 return _make_exception('unhandled', org_exc, *args)
89 89 return _make_exception_wrapper
90 90
91 91
92 92 def URLError(org_exc=None):
93 93 def _make_exception_wrapper(*args):
94 94 return _make_exception('url_error', org_exc, *args)
95 95 return _make_exception_wrapper
96 96
97 97
98 98 def SubrepoMergeException(org_exc=None):
99 99 def _make_exception_wrapper(*args):
100 100 return _make_exception('subrepo_merge_error', org_exc, *args)
101 101 return _make_exception_wrapper
102 102
103 103
104 104 class HTTPRepoLocked(HTTPLocked):
105 105 """
106 106 Subclass of HTTPLocked response that allows to set the title and status
107 107 code via constructor arguments.
108 108 """
109 109 def __init__(self, title, status_code=None, **kwargs):
110 110 self.code = status_code or HTTPLocked.code
111 111 self.title = title
112 super(HTTPRepoLocked, self).__init__(**kwargs)
112 super().__init__(**kwargs)
113 113
114 114
115 115 class HTTPRepoBranchProtected(HTTPForbidden):
116 116 def __init__(self, *args, **kwargs):
117 117 super(HTTPForbidden, self).__init__(*args, **kwargs)
118 118
119 119
120 120 class RefNotFoundException(KeyError):
121 121 pass
122 122
123 123
124 124 class NoContentException(ValueError):
125 125 pass
@@ -1,19 +1,19 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 from app import create_app
19 from .app import create_app # noqa
@@ -1,292 +1,296 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import re
19 19 import logging
20 from wsgiref.util import FileWrapper
21 20
22 import simplejson as json
23 21 from pyramid.config import Configurator
24 22 from pyramid.response import Response, FileIter
25 23 from pyramid.httpexceptions import (
26 24 HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
27 25 HTTPUnprocessableEntity)
28 26
27 from vcsserver.lib.rc_json import json
29 28 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
30 29 from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
31 from vcsserver.utils import safe_int
30 from vcsserver.str_utils import safe_int
32 31
33 32 log = logging.getLogger(__name__)
34 33
35 34
36 GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ?
35 GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' # +json ?
37 36 GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
38 37
39 38
40 39 def write_response_error(http_exception, text=None):
41 40 content_type = GIT_LFS_CONTENT_TYPE + '+json'
42 41 _exception = http_exception(content_type=content_type)
43 42 _exception.content_type = content_type
44 43 if text:
45 44 _exception.body = json.dumps({'message': text})
46 45 log.debug('LFS: writing response of type %s to client with text:%s',
47 46 http_exception, text)
48 47 return _exception
49 48
50 49
51 class AuthHeaderRequired(object):
50 class AuthHeaderRequired:
52 51 """
53 52 Decorator to check if request has proper auth-header
54 53 """
55 54
56 55 def __call__(self, func):
57 56 return get_cython_compat_decorator(self.__wrapper, func)
58 57
59 58 def __wrapper(self, func, *fargs, **fkwargs):
60 59 request = fargs[1]
61 60 auth = request.authorization
62 61 if not auth:
63 62 return write_response_error(HTTPForbidden)
64 63 return func(*fargs[1:], **fkwargs)
65 64
66 65
67 66 # views
68 67
69 68 def lfs_objects(request):
70 69 # indicate not supported, V1 API
71 70 log.warning('LFS: v1 api not supported, reporting it back to client')
72 71 return write_response_error(HTTPNotImplemented, 'LFS: v1 api not supported')
73 72
74 73
75 74 @AuthHeaderRequired()
76 75 def lfs_objects_batch(request):
77 76 """
78 77 The client sends the following information to the Batch endpoint to transfer some objects:
79 78
80 79 operation - Should be download or upload.
81 80 transfers - An optional Array of String identifiers for transfer
82 81 adapters that the client has configured. If omitted, the basic
83 82 transfer adapter MUST be assumed by the server.
84 83 objects - An Array of objects to download.
85 84 oid - String OID of the LFS object.
86 85 size - Integer byte size of the LFS object. Must be at least zero.
87 86 """
88 87 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
89 88 auth = request.authorization
90 89 repo = request.matchdict.get('repo')
91 90 data = request.json
92 91 operation = data.get('operation')
93 92 http_scheme = request.registry.git_lfs_http_scheme
94 93
95 94 if operation not in ('download', 'upload'):
96 95 log.debug('LFS: unsupported operation:%s', operation)
97 96 return write_response_error(
98 HTTPBadRequest, 'unsupported operation mode: `%s`' % operation)
97 HTTPBadRequest, f'unsupported operation mode: `{operation}`')
99 98
100 99 if 'objects' not in data:
101 100 log.debug('LFS: missing objects data')
102 101 return write_response_error(
103 102 HTTPBadRequest, 'missing objects data')
104 103
105 104 log.debug('LFS: handling operation of type: %s', operation)
106 105
107 106 objects = []
108 107 for o in data['objects']:
109 108 try:
110 109 oid = o['oid']
111 110 obj_size = o['size']
112 111 except KeyError:
113 112 log.exception('LFS, failed to extract data')
114 113 return write_response_error(
115 114 HTTPBadRequest, 'unsupported data in objects')
116 115
117 116 obj_data = {'oid': oid}
117 if http_scheme == 'http':
118 # Note(marcink): when using http, we might have a custom port
119 # so we skip setting it to http, url dispatch then wont generate a port in URL
120 # for development we need this
121 http_scheme = None
118 122
119 obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
123 obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
120 124 _scheme=http_scheme)
121 125 obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
122 126 _scheme=http_scheme)
123 127 store = LFSOidStore(
124 128 oid, repo, store_location=request.registry.git_lfs_store_path)
125 129 handler = OidHandler(
126 130 store, repo, auth, oid, obj_size, obj_data,
127 131 obj_href, obj_verify_href)
128 132
129 133 # this verifies also OIDs
130 134 actions, errors = handler.exec_operation(operation)
131 135 if errors:
132 136 log.warning('LFS: got following errors: %s', errors)
133 137 obj_data['errors'] = errors
134 138
135 139 if actions:
136 140 obj_data['actions'] = actions
137 141
138 142 obj_data['size'] = obj_size
139 143 obj_data['authenticated'] = True
140 144 objects.append(obj_data)
141 145
142 146 result = {'objects': objects, 'transfer': 'basic'}
143 147 log.debug('LFS Response %s', safe_result(result))
144 148
145 149 return result
146 150
147 151
148 152 def lfs_objects_oid_upload(request):
149 153 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
150 154 repo = request.matchdict.get('repo')
151 155 oid = request.matchdict.get('oid')
152 156 store = LFSOidStore(
153 157 oid, repo, store_location=request.registry.git_lfs_store_path)
154 158 engine = store.get_engine(mode='wb')
155 159 log.debug('LFS: starting chunked write of LFS oid: %s to storage', oid)
156 160
157 161 body = request.environ['wsgi.input']
158 162
159 163 with engine as f:
160 164 blksize = 64 * 1024 # 64kb
161 165 while True:
162 166 # read in chunks as stream comes in from Gunicorn
163 167 # this is a specific Gunicorn support function.
164 168 # might work differently on waitress
165 169 chunk = body.read(blksize)
166 170 if not chunk:
167 171 break
168 172 f.write(chunk)
169 173
170 174 return {'upload': 'ok'}
171 175
172 176
173 177 def lfs_objects_oid_download(request):
174 178 repo = request.matchdict.get('repo')
175 179 oid = request.matchdict.get('oid')
176 180
177 181 store = LFSOidStore(
178 182 oid, repo, store_location=request.registry.git_lfs_store_path)
179 183 if not store.has_oid():
180 184 log.debug('LFS: oid %s does not exists in store', oid)
181 185 return write_response_error(
182 HTTPNotFound, 'requested file with oid `%s` not found in store' % oid)
186 HTTPNotFound, f'requested file with oid `{oid}` not found in store')
183 187
184 188 # TODO(marcink): support range header ?
185 189 # Range: bytes=0-, `bytes=(\d+)\-.*`
186 190
187 191 f = open(store.oid_path, 'rb')
188 192 response = Response(
189 193 content_type='application/octet-stream', app_iter=FileIter(f))
190 194 response.headers.add('X-RC-LFS-Response-Oid', str(oid))
191 195 return response
192 196
193 197
194 198 def lfs_objects_verify(request):
195 199 request.response.content_type = GIT_LFS_CONTENT_TYPE + '+json'
196 200 repo = request.matchdict.get('repo')
197 201
198 202 data = request.json
199 203 oid = data.get('oid')
200 204 size = safe_int(data.get('size'))
201 205
202 206 if not (oid and size):
203 207 return write_response_error(
204 208 HTTPBadRequest, 'missing oid and size in request data')
205 209
206 210 store = LFSOidStore(
207 211 oid, repo, store_location=request.registry.git_lfs_store_path)
208 212 if not store.has_oid():
209 213 log.debug('LFS: oid %s does not exists in store', oid)
210 214 return write_response_error(
211 HTTPNotFound, 'oid `%s` does not exists in store' % oid)
215 HTTPNotFound, f'oid `{oid}` does not exists in store')
212 216
213 217 store_size = store.size_oid()
214 218 if store_size != size:
215 msg = 'requested file size mismatch store size:%s requested:%s' % (
219 msg = 'requested file size mismatch store size:{} requested:{}'.format(
216 220 store_size, size)
217 221 return write_response_error(
218 222 HTTPUnprocessableEntity, msg)
219 223
220 224 return {'message': {'size': 'ok', 'in_store': 'ok'}}
221 225
222 226
223 227 def lfs_objects_lock(request):
224 228 return write_response_error(
225 229 HTTPNotImplemented, 'GIT LFS locking api not supported')
226 230
227 231
228 232 def not_found(request):
229 233 return write_response_error(
230 234 HTTPNotFound, 'request path not found')
231 235
232 236
233 237 def lfs_disabled(request):
234 238 return write_response_error(
235 239 HTTPNotImplemented, 'GIT LFS disabled for this repo')
236 240
237 241
238 242 def git_lfs_app(config):
239 243
240 244 # v1 API deprecation endpoint
241 245 config.add_route('lfs_objects',
242 246 '/{repo:.*?[^/]}/info/lfs/objects')
243 247 config.add_view(lfs_objects, route_name='lfs_objects',
244 248 request_method='POST', renderer='json')
245 249
246 250 # locking API
247 251 config.add_route('lfs_objects_lock',
248 252 '/{repo:.*?[^/]}/info/lfs/locks')
249 253 config.add_view(lfs_objects_lock, route_name='lfs_objects_lock',
250 254 request_method=('POST', 'GET'), renderer='json')
251 255
252 256 config.add_route('lfs_objects_lock_verify',
253 257 '/{repo:.*?[^/]}/info/lfs/locks/verify')
254 258 config.add_view(lfs_objects_lock, route_name='lfs_objects_lock_verify',
255 259 request_method=('POST', 'GET'), renderer='json')
256 260
257 261 # batch API
258 262 config.add_route('lfs_objects_batch',
259 263 '/{repo:.*?[^/]}/info/lfs/objects/batch')
260 264 config.add_view(lfs_objects_batch, route_name='lfs_objects_batch',
261 265 request_method='POST', renderer='json')
262 266
263 267 # oid upload/download API
264 268 config.add_route('lfs_objects_oid',
265 269 '/{repo:.*?[^/]}/info/lfs/objects/{oid}')
266 270 config.add_view(lfs_objects_oid_upload, route_name='lfs_objects_oid',
267 271 request_method='PUT', renderer='json')
268 272 config.add_view(lfs_objects_oid_download, route_name='lfs_objects_oid',
269 273 request_method='GET', renderer='json')
270 274
271 275 # verification API
272 276 config.add_route('lfs_objects_verify',
273 277 '/{repo:.*?[^/]}/info/lfs/verify')
274 278 config.add_view(lfs_objects_verify, route_name='lfs_objects_verify',
275 279 request_method='POST', renderer='json')
276 280
277 281 # not found handler for API
278 282 config.add_notfound_view(not_found, renderer='json')
279 283
280 284
281 285 def create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
282 286 config = Configurator()
283 287 if git_lfs_enabled:
284 288 config.include(git_lfs_app)
285 289 config.registry.git_lfs_store_path = git_lfs_store_path
286 290 config.registry.git_lfs_http_scheme = git_lfs_http_scheme
287 291 else:
288 292 # not found handler for API, reporting disabled LFS support
289 293 config.add_notfound_view(lfs_disabled, renderer='json')
290 294
291 295 app = config.make_wsgi_app()
292 296 return app
@@ -1,175 +1,177 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import shutil
20 20 import logging
21 21 from collections import OrderedDict
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 class OidHandler(object):
26 class OidHandler:
27 27
28 28 def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
29 29 obj_verify_href=None):
30 30 self.current_store = store
31 31 self.repo_name = repo_name
32 32 self.auth = auth
33 33 self.oid = oid
34 34 self.obj_size = obj_size
35 35 self.obj_data = obj_data
36 36 self.obj_href = obj_href
37 37 self.obj_verify_href = obj_verify_href
38 38
39 39 def get_store(self, mode=None):
40 40 return self.current_store
41 41
42 42 def get_auth(self):
43 43 """returns auth header for re-use in upload/download"""
44 44 return " ".join(self.auth)
45 45
46 46 def download(self):
47 47
48 48 store = self.get_store()
49 49 response = None
50 50 has_errors = None
51 51
52 52 if not store.has_oid():
53 53 # error reply back to client that something is wrong with dl
54 err_msg = 'object: {} does not exist in store'.format(store.oid)
54 err_msg = f'object: {store.oid} does not exist in store'
55 55 has_errors = OrderedDict(
56 56 error=OrderedDict(
57 57 code=404,
58 58 message=err_msg
59 59 )
60 60 )
61 61
62 62 download_action = OrderedDict(
63 63 href=self.obj_href,
64 64 header=OrderedDict([("Authorization", self.get_auth())])
65 65 )
66 66 if not has_errors:
67 67 response = OrderedDict(download=download_action)
68 68 return response, has_errors
69 69
70 70 def upload(self, skip_existing=True):
71 71 """
72 72 Write upload action for git-lfs server
73 73 """
74 74
75 75 store = self.get_store()
76 76 response = None
77 77 has_errors = None
78 78
79 79 # verify if we have the OID before, if we do, reply with empty
80 80 if store.has_oid():
81 81 log.debug('LFS: store already has oid %s', store.oid)
82 82
83 83 # validate size
84 84 store_size = store.size_oid()
85 85 size_match = store_size == self.obj_size
86 86 if not size_match:
87 87 log.warning(
88 88 'LFS: size mismatch for oid:%s, in store:%s expected: %s',
89 89 self.oid, store_size, self.obj_size)
90 90 elif skip_existing:
91 91 log.debug('LFS: skipping further action as oid is existing')
92 92 return response, has_errors
93 93
94 94 chunked = ("Transfer-Encoding", "chunked")
95 95 upload_action = OrderedDict(
96 96 href=self.obj_href,
97 97 header=OrderedDict([("Authorization", self.get_auth()), chunked])
98 98 )
99 99 if not has_errors:
100 100 response = OrderedDict(upload=upload_action)
101 101 # if specified in handler, return the verification endpoint
102 102 if self.obj_verify_href:
103 103 verify_action = OrderedDict(
104 104 href=self.obj_verify_href,
105 105 header=OrderedDict([("Authorization", self.get_auth())])
106 106 )
107 107 response['verify'] = verify_action
108 108 return response, has_errors
109 109
110 110 def exec_operation(self, operation, *args, **kwargs):
111 111 handler = getattr(self, operation)
112 112 log.debug('LFS: handling request using %s handler', handler)
113 113 return handler(*args, **kwargs)
114 114
115 115
116 class LFSOidStore(object):
116 class LFSOidStore:
117 117
118 118 def __init__(self, oid, repo, store_location=None):
119 119 self.oid = oid
120 120 self.repo = repo
121 self.store_path = store_location or self.get_default_store()
121 defined_store_path = store_location or self.get_default_store()
122 self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}"
123 self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}"
122 124 self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
123 125 self.oid_path = os.path.join(self.store_path, oid)
124 126 self.fd = None
125 127
126 128 def get_engine(self, mode):
127 129 """
128 130 engine = .get_engine(mode='wb')
129 131 with engine as f:
130 132 f.write('...')
131 133 """
132 134
133 class StoreEngine(object):
135 class StoreEngine:
134 136 def __init__(self, mode, store_path, oid_path, tmp_oid_path):
135 137 self.mode = mode
136 138 self.store_path = store_path
137 139 self.oid_path = oid_path
138 140 self.tmp_oid_path = tmp_oid_path
139 141
140 142 def __enter__(self):
141 143 if not os.path.isdir(self.store_path):
142 144 os.makedirs(self.store_path)
143 145
144 146 # TODO(marcink): maybe write metadata here with size/oid ?
145 147 fd = open(self.tmp_oid_path, self.mode)
146 148 self.fd = fd
147 149 return fd
148 150
149 151 def __exit__(self, exc_type, exc_value, traceback):
150 152 # close tmp file, and rename to final destination
151 153 self.fd.close()
152 154 shutil.move(self.tmp_oid_path, self.oid_path)
153 155
154 156 return StoreEngine(
155 157 mode, self.store_path, self.oid_path, self.tmp_oid_path)
156 158
157 159 def get_default_store(self):
158 160 """
159 161 Default store, consistent with defaults of Mercurial large files store
160 162 which is /home/username/.cache/largefiles
161 163 """
162 164 user_home = os.path.expanduser("~")
163 165 return os.path.join(user_home, '.cache', 'lfs-store')
164 166
165 167 def has_oid(self):
166 168 return os.path.exists(os.path.join(self.store_path, self.oid))
167 169
168 170 def size_oid(self):
169 171 size = -1
170 172
171 173 if self.has_oid():
172 174 oid = os.path.join(self.store_path, self.oid)
173 175 size = os.stat(oid).st_size
174 176
175 177 return size
@@ -1,16 +1,16 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -1,272 +1,274 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import pytest
20 20 from webtest.app import TestApp as WebObTestApp
21 import simplejson as json
22 21
22 from vcsserver.lib.rc_json import json
23 from vcsserver.str_utils import safe_bytes
23 24 from vcsserver.git_lfs.app import create_app
25 from vcsserver.git_lfs.lib import LFSOidStore
24 26
25 27
26 28 @pytest.fixture(scope='function')
27 29 def git_lfs_app(tmpdir):
28 30 custom_app = WebObTestApp(create_app(
29 31 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
30 32 git_lfs_http_scheme='http'))
31 33 custom_app._store = str(tmpdir)
32 34 return custom_app
33 35
34 36
35 37 @pytest.fixture(scope='function')
36 38 def git_lfs_https_app(tmpdir):
37 39 custom_app = WebObTestApp(create_app(
38 40 git_lfs_enabled=True, git_lfs_store_path=str(tmpdir),
39 41 git_lfs_http_scheme='https'))
40 42 custom_app._store = str(tmpdir)
41 43 return custom_app
42 44
43 45
44 46 @pytest.fixture()
45 47 def http_auth():
46 48 return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
47 49
48 50
49 class TestLFSApplication(object):
51 class TestLFSApplication:
50 52
51 53 def test_app_wrong_path(self, git_lfs_app):
52 54 git_lfs_app.get('/repo/info/lfs/xxx', status=404)
53 55
54 56 def test_app_deprecated_endpoint(self, git_lfs_app):
55 57 response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
56 58 assert response.status_code == 501
57 assert json.loads(response.text) == {u'message': u'LFS: v1 api not supported'}
59 assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
58 60
59 61 def test_app_lock_verify_api_not_available(self, git_lfs_app):
60 62 response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
61 63 assert response.status_code == 501
62 64 assert json.loads(response.text) == {
63 u'message': u'GIT LFS locking api not supported'}
65 'message': 'GIT LFS locking api not supported'}
64 66
65 67 def test_app_lock_api_not_available(self, git_lfs_app):
66 68 response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
67 69 assert response.status_code == 501
68 70 assert json.loads(response.text) == {
69 u'message': u'GIT LFS locking api not supported'}
71 'message': 'GIT LFS locking api not supported'}
70 72
71 73 def test_app_batch_api_missing_auth(self, git_lfs_app):
72 74 git_lfs_app.post_json(
73 75 '/repo/info/lfs/objects/batch', params={}, status=403)
74 76
75 77 def test_app_batch_api_unsupported_operation(self, git_lfs_app, http_auth):
76 78 response = git_lfs_app.post_json(
77 79 '/repo/info/lfs/objects/batch', params={}, status=400,
78 80 extra_environ=http_auth)
79 81 assert json.loads(response.text) == {
80 u'message': u'unsupported operation mode: `None`'}
82 'message': 'unsupported operation mode: `None`'}
81 83
82 84 def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
83 85 response = git_lfs_app.post_json(
84 86 '/repo/info/lfs/objects/batch', params={'operation': 'download'},
85 87 status=400, extra_environ=http_auth)
86 88 assert json.loads(response.text) == {
87 u'message': u'missing objects data'}
89 'message': 'missing objects data'}
88 90
89 91 def test_app_batch_api_unsupported_data_in_objects(
90 92 self, git_lfs_app, http_auth):
91 93 params = {'operation': 'download',
92 94 'objects': [{}]}
93 95 response = git_lfs_app.post_json(
94 96 '/repo/info/lfs/objects/batch', params=params, status=400,
95 97 extra_environ=http_auth)
96 98 assert json.loads(response.text) == {
97 u'message': u'unsupported data in objects'}
99 'message': 'unsupported data in objects'}
98 100
99 101 def test_app_batch_api_download_missing_object(
100 102 self, git_lfs_app, http_auth):
101 103 params = {'operation': 'download',
102 104 'objects': [{'oid': '123', 'size': '1024'}]}
103 105 response = git_lfs_app.post_json(
104 106 '/repo/info/lfs/objects/batch', params=params,
105 107 extra_environ=http_auth)
106 108
107 109 expected_objects = [
108 {u'authenticated': True,
109 u'errors': {u'error': {
110 u'code': 404,
111 u'message': u'object: 123 does not exist in store'}},
112 u'oid': u'123',
113 u'size': u'1024'}
110 {'authenticated': True,
111 'errors': {'error': {
112 'code': 404,
113 'message': 'object: 123 does not exist in store'}},
114 'oid': '123',
115 'size': '1024'}
114 116 ]
115 117 assert json.loads(response.text) == {
116 118 'objects': expected_objects, 'transfer': 'basic'}
117 119
118 120 def test_app_batch_api_download(self, git_lfs_app, http_auth):
119 121 oid = '456'
120 oid_path = os.path.join(git_lfs_app._store, oid)
122 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
121 123 if not os.path.isdir(os.path.dirname(oid_path)):
122 124 os.makedirs(os.path.dirname(oid_path))
123 125 with open(oid_path, 'wb') as f:
124 f.write('OID_CONTENT')
126 f.write(safe_bytes('OID_CONTENT'))
125 127
126 128 params = {'operation': 'download',
127 129 'objects': [{'oid': oid, 'size': '1024'}]}
128 130 response = git_lfs_app.post_json(
129 131 '/repo/info/lfs/objects/batch', params=params,
130 132 extra_environ=http_auth)
131 133
132 134 expected_objects = [
133 {u'authenticated': True,
134 u'actions': {
135 u'download': {
136 u'header': {u'Authorization': u'Basic XXXXX'},
137 u'href': u'http://localhost/repo/info/lfs/objects/456'},
135 {'authenticated': True,
136 'actions': {
137 'download': {
138 'header': {'Authorization': 'Basic XXXXX'},
139 'href': 'http://localhost/repo/info/lfs/objects/456'},
138 140 },
139 u'oid': u'456',
140 u'size': u'1024'}
141 'oid': '456',
142 'size': '1024'}
141 143 ]
142 144 assert json.loads(response.text) == {
143 145 'objects': expected_objects, 'transfer': 'basic'}
144 146
145 147 def test_app_batch_api_upload(self, git_lfs_app, http_auth):
146 148 params = {'operation': 'upload',
147 149 'objects': [{'oid': '123', 'size': '1024'}]}
148 150 response = git_lfs_app.post_json(
149 151 '/repo/info/lfs/objects/batch', params=params,
150 152 extra_environ=http_auth)
151 153 expected_objects = [
152 {u'authenticated': True,
153 u'actions': {
154 u'upload': {
155 u'header': {u'Authorization': u'Basic XXXXX',
156 u'Transfer-Encoding': u'chunked'},
157 u'href': u'http://localhost/repo/info/lfs/objects/123'},
158 u'verify': {
159 u'header': {u'Authorization': u'Basic XXXXX'},
160 u'href': u'http://localhost/repo/info/lfs/verify'}
154 {'authenticated': True,
155 'actions': {
156 'upload': {
157 'header': {'Authorization': 'Basic XXXXX',
158 'Transfer-Encoding': 'chunked'},
159 'href': 'http://localhost/repo/info/lfs/objects/123'},
160 'verify': {
161 'header': {'Authorization': 'Basic XXXXX'},
162 'href': 'http://localhost/repo/info/lfs/verify'}
161 163 },
162 u'oid': u'123',
163 u'size': u'1024'}
164 'oid': '123',
165 'size': '1024'}
164 166 ]
165 167 assert json.loads(response.text) == {
166 168 'objects': expected_objects, 'transfer': 'basic'}
167 169
168 170 def test_app_batch_api_upload_for_https(self, git_lfs_https_app, http_auth):
169 171 params = {'operation': 'upload',
170 172 'objects': [{'oid': '123', 'size': '1024'}]}
171 173 response = git_lfs_https_app.post_json(
172 174 '/repo/info/lfs/objects/batch', params=params,
173 175 extra_environ=http_auth)
174 176 expected_objects = [
175 {u'authenticated': True,
176 u'actions': {
177 u'upload': {
178 u'header': {u'Authorization': u'Basic XXXXX',
179 u'Transfer-Encoding': u'chunked'},
180 u'href': u'https://localhost/repo/info/lfs/objects/123'},
181 u'verify': {
182 u'header': {u'Authorization': u'Basic XXXXX'},
183 u'href': u'https://localhost/repo/info/lfs/verify'}
177 {'authenticated': True,
178 'actions': {
179 'upload': {
180 'header': {'Authorization': 'Basic XXXXX',
181 'Transfer-Encoding': 'chunked'},
182 'href': 'https://localhost/repo/info/lfs/objects/123'},
183 'verify': {
184 'header': {'Authorization': 'Basic XXXXX'},
185 'href': 'https://localhost/repo/info/lfs/verify'}
184 186 },
185 u'oid': u'123',
186 u'size': u'1024'}
187 'oid': '123',
188 'size': '1024'}
187 189 ]
188 190 assert json.loads(response.text) == {
189 191 'objects': expected_objects, 'transfer': 'basic'}
190 192
191 193 def test_app_verify_api_missing_data(self, git_lfs_app):
192 194 params = {'oid': 'missing'}
193 195 response = git_lfs_app.post_json(
194 196 '/repo/info/lfs/verify', params=params,
195 197 status=400)
196 198
197 199 assert json.loads(response.text) == {
198 u'message': u'missing oid and size in request data'}
200 'message': 'missing oid and size in request data'}
199 201
200 202 def test_app_verify_api_missing_obj(self, git_lfs_app):
201 203 params = {'oid': 'missing', 'size': '1024'}
202 204 response = git_lfs_app.post_json(
203 205 '/repo/info/lfs/verify', params=params,
204 206 status=404)
205 207
206 208 assert json.loads(response.text) == {
207 u'message': u'oid `missing` does not exists in store'}
209 'message': 'oid `missing` does not exists in store'}
208 210
209 211 def test_app_verify_api_size_mismatch(self, git_lfs_app):
210 212 oid = 'existing'
211 oid_path = os.path.join(git_lfs_app._store, oid)
213 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
212 214 if not os.path.isdir(os.path.dirname(oid_path)):
213 215 os.makedirs(os.path.dirname(oid_path))
214 216 with open(oid_path, 'wb') as f:
215 f.write('OID_CONTENT')
217 f.write(safe_bytes('OID_CONTENT'))
216 218
217 219 params = {'oid': oid, 'size': '1024'}
218 220 response = git_lfs_app.post_json(
219 221 '/repo/info/lfs/verify', params=params, status=422)
220 222
221 223 assert json.loads(response.text) == {
222 u'message': u'requested file size mismatch '
223 u'store size:11 requested:1024'}
224 'message': 'requested file size mismatch '
225 'store size:11 requested:1024'}
224 226
225 227 def test_app_verify_api(self, git_lfs_app):
226 228 oid = 'existing'
227 oid_path = os.path.join(git_lfs_app._store, oid)
229 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
228 230 if not os.path.isdir(os.path.dirname(oid_path)):
229 231 os.makedirs(os.path.dirname(oid_path))
230 232 with open(oid_path, 'wb') as f:
231 f.write('OID_CONTENT')
233 f.write(safe_bytes('OID_CONTENT'))
232 234
233 235 params = {'oid': oid, 'size': 11}
234 236 response = git_lfs_app.post_json(
235 237 '/repo/info/lfs/verify', params=params)
236 238
237 239 assert json.loads(response.text) == {
238 u'message': {u'size': u'ok', u'in_store': u'ok'}}
240 'message': {'size': 'ok', 'in_store': 'ok'}}
239 241
240 242 def test_app_download_api_oid_not_existing(self, git_lfs_app):
241 243 oid = 'missing'
242 244
243 245 response = git_lfs_app.get(
244 246 '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
245 247
246 248 assert json.loads(response.text) == {
247 u'message': u'requested file with oid `missing` not found in store'}
249 'message': 'requested file with oid `missing` not found in store'}
248 250
249 251 def test_app_download_api(self, git_lfs_app):
250 252 oid = 'existing'
251 oid_path = os.path.join(git_lfs_app._store, oid)
253 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
252 254 if not os.path.isdir(os.path.dirname(oid_path)):
253 255 os.makedirs(os.path.dirname(oid_path))
254 256 with open(oid_path, 'wb') as f:
255 f.write('OID_CONTENT')
257 f.write(safe_bytes('OID_CONTENT'))
256 258
257 259 response = git_lfs_app.get(
258 260 '/repo/info/lfs/objects/{oid}'.format(oid=oid))
259 261 assert response
260 262
261 263 def test_app_upload(self, git_lfs_app):
262 264 oid = 'uploaded'
263 265
264 266 response = git_lfs_app.put(
265 267 '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
266 268
267 assert json.loads(response.text) == {u'upload': u'ok'}
269 assert json.loads(response.text) == {'upload': 'ok'}
268 270
269 271 # verify that we actually wrote that OID
270 oid_path = os.path.join(git_lfs_app._store, oid)
272 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
271 273 assert os.path.isfile(oid_path)
272 274 assert 'CONTENT' == open(oid_path).read()
@@ -1,141 +1,142 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import pytest
20 from vcsserver.str_utils import safe_bytes
20 21 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
21 22
22 23
23 24 @pytest.fixture()
24 25 def lfs_store(tmpdir):
25 26 repo = 'test'
26 27 oid = '123456789'
27 28 store = LFSOidStore(oid=oid, repo=repo, store_location=str(tmpdir))
28 29 return store
29 30
30 31
31 32 @pytest.fixture()
32 33 def oid_handler(lfs_store):
33 34 store = lfs_store
34 35 repo = store.repo
35 36 oid = store.oid
36 37
37 38 oid_handler = OidHandler(
38 39 store=store, repo_name=repo, auth=('basic', 'xxxx'),
39 40 oid=oid,
40 41 obj_size='1024', obj_data={}, obj_href='http://localhost/handle_oid',
41 42 obj_verify_href='http://localhost/verify')
42 43 return oid_handler
43 44
44 45
45 class TestOidHandler(object):
46 class TestOidHandler:
46 47
47 48 @pytest.mark.parametrize('exec_action', [
48 49 'download',
49 50 'upload',
50 51 ])
51 52 def test_exec_action(self, exec_action, oid_handler):
52 53 handler = oid_handler.exec_operation(exec_action)
53 54 assert handler
54 55
55 56 def test_exec_action_undefined(self, oid_handler):
56 57 with pytest.raises(AttributeError):
57 58 oid_handler.exec_operation('wrong')
58 59
59 60 def test_download_oid_not_existing(self, oid_handler):
60 61 response, has_errors = oid_handler.exec_operation('download')
61 62
62 63 assert response is None
63 64 assert has_errors['error'] == {
64 65 'code': 404,
65 66 'message': 'object: 123456789 does not exist in store'}
66 67
67 68 def test_download_oid(self, oid_handler):
68 69 store = oid_handler.get_store()
69 70 if not os.path.isdir(os.path.dirname(store.oid_path)):
70 71 os.makedirs(os.path.dirname(store.oid_path))
71 72
72 73 with open(store.oid_path, 'wb') as f:
73 f.write('CONTENT')
74 f.write(safe_bytes('CONTENT'))
74 75
75 76 response, has_errors = oid_handler.exec_operation('download')
76 77
77 78 assert has_errors is None
78 79 assert response['download'] == {
79 80 'header': {'Authorization': 'basic xxxx'},
80 81 'href': 'http://localhost/handle_oid'
81 82 }
82 83
83 84 def test_upload_oid_that_exists(self, oid_handler):
84 85 store = oid_handler.get_store()
85 86 if not os.path.isdir(os.path.dirname(store.oid_path)):
86 87 os.makedirs(os.path.dirname(store.oid_path))
87 88
88 89 with open(store.oid_path, 'wb') as f:
89 f.write('CONTENT')
90 f.write(safe_bytes('CONTENT'))
90 91 oid_handler.obj_size = 7
91 92 response, has_errors = oid_handler.exec_operation('upload')
92 93 assert has_errors is None
93 94 assert response is None
94 95
95 96 def test_upload_oid_that_exists_but_has_wrong_size(self, oid_handler):
96 97 store = oid_handler.get_store()
97 98 if not os.path.isdir(os.path.dirname(store.oid_path)):
98 99 os.makedirs(os.path.dirname(store.oid_path))
99 100
100 101 with open(store.oid_path, 'wb') as f:
101 f.write('CONTENT')
102 f.write(safe_bytes('CONTENT'))
102 103
103 104 oid_handler.obj_size = 10240
104 105 response, has_errors = oid_handler.exec_operation('upload')
105 106 assert has_errors is None
106 107 assert response['upload'] == {
107 108 'header': {'Authorization': 'basic xxxx',
108 109 'Transfer-Encoding': 'chunked'},
109 110 'href': 'http://localhost/handle_oid',
110 111 }
111 112
112 113 def test_upload_oid(self, oid_handler):
113 114 response, has_errors = oid_handler.exec_operation('upload')
114 115 assert has_errors is None
115 116 assert response['upload'] == {
116 117 'header': {'Authorization': 'basic xxxx',
117 118 'Transfer-Encoding': 'chunked'},
118 119 'href': 'http://localhost/handle_oid'
119 120 }
120 121
121 122
122 class TestLFSStore(object):
123 class TestLFSStore:
123 124 def test_write_oid(self, lfs_store):
124 125 oid_location = lfs_store.oid_path
125 126
126 127 assert not os.path.isfile(oid_location)
127 128
128 129 engine = lfs_store.get_engine(mode='wb')
129 130 with engine as f:
130 f.write('CONTENT')
131 f.write(safe_bytes('CONTENT'))
131 132
132 133 assert os.path.isfile(oid_location)
133 134
134 135 def test_detect_has_oid(self, lfs_store):
135 136
136 137 assert lfs_store.has_oid() is False
137 138 engine = lfs_store.get_engine(mode='wb')
138 139 with engine as f:
139 f.write('CONTENT')
140 f.write(safe_bytes('CONTENT'))
140 141
141 assert lfs_store.has_oid() is True No newline at end of file
142 assert lfs_store.has_oid() is True
@@ -1,50 +1,50 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import copy
18 18 from functools import wraps
19 19
20 20
21 21 def get_cython_compat_decorator(wrapper, func):
22 22 """
23 23 Creates a cython compatible decorator. The previously used
24 24 decorator.decorator() function seems to be incompatible with cython.
25 25
26 26 :param wrapper: __wrapper method of the decorator class
27 27 :param func: decorated function
28 28 """
29 29 @wraps(func)
30 30 def local_wrapper(*args, **kwds):
31 31 return wrapper(func, *args, **kwds)
32 32 local_wrapper.__wrapped__ = func
33 33 return local_wrapper
34 34
35 35
36 36 def safe_result(result):
37 37 """clean result for better representation in logs"""
38 38 clean_copy = copy.deepcopy(result)
39 39
40 40 try:
41 41 if 'objects' in clean_copy:
42 42 for oid_data in clean_copy['objects']:
43 43 if 'actions' in oid_data:
44 44 for action_name, data in oid_data['actions'].items():
45 45 if 'header' in data:
46 46 data['header'] = {'Authorization': '*****'}
47 47 except Exception:
48 48 return result
49 49
50 50 return clean_copy
@@ -1,79 +1,92 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """
19 19 Mercurial libs compatibility
20 20 """
21 21
22 22 import mercurial
23 23 from mercurial import demandimport
24
24 25 # patch demandimport, due to bug in mercurial when it always triggers
25 26 # demandimport.enable()
27 from vcsserver.str_utils import safe_bytes
28
26 29 demandimport.enable = lambda *args, **kwargs: 1
27 30
28 31 from mercurial import ui
29 32 from mercurial import patch
30 33 from mercurial import config
31 34 from mercurial import extensions
32 35 from mercurial import scmutil
33 36 from mercurial import archival
34 37 from mercurial import discovery
35 38 from mercurial import unionrepo
36 39 from mercurial import localrepo
37 40 from mercurial import merge as hg_merge
38 41 from mercurial import subrepo
39 42 from mercurial import subrepoutil
40 43 from mercurial import tags as hg_tag
41 44 from mercurial import util as hgutil
42 from mercurial.commands import clone, nullid, pull
45 from mercurial.commands import clone, pull
46 from mercurial.node import nullid
43 47 from mercurial.context import memctx, memfilectx
44 48 from mercurial.error import (
45 49 LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
46 50 RequirementError, ProgrammingError)
47 51 from mercurial.hgweb import hgweb_mod
48 52 from mercurial.localrepo import instance
49 53 from mercurial.match import match, alwaysmatcher, patternmatcher
50 54 from mercurial.mdiff import diffopts
51 55 from mercurial.node import bin, hex
52 56 from mercurial.encoding import tolocal
53 57 from mercurial.discovery import findcommonoutgoing
54 58 from mercurial.hg import peer
55 59 from mercurial.httppeer import makepeer
56 from mercurial.util import url as hg_url
60 from mercurial.utils.urlutil import url as hg_url
57 61 from mercurial.scmutil import revrange, revsymbol
58 62 from mercurial.node import nullrev
59 63 from mercurial import exchange
60 64 from hgext import largefiles
61 65
62 66 # those authnadlers are patched for python 2.6.5 bug an
63 67 # infinit looping when given invalid resources
64 68 from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
65 69
70 # hg strip is in core now
71 from mercurial import strip as hgext_strip
72
66 73
67 74 def get_ctx(repo, ref):
75 if not isinstance(ref, int):
76 ref = safe_bytes(ref)
77
68 78 try:
69 79 ctx = repo[ref]
80 return ctx
70 81 except (ProgrammingError, TypeError):
71 82 # we're unable to find the rev using a regular lookup, we fallback
72 83 # to slower, but backward compat revsymbol usage
73 ctx = revsymbol(repo, ref)
84 pass
74 85 except (LookupError, RepoLookupError):
75 86 # Similar case as above but only for refs that are not numeric
76 if isinstance(ref, (int, long)):
87 if isinstance(ref, int):
77 88 raise
78 ctx = revsymbol(repo, ref)
89
90 ctx = revsymbol(repo, ref)
91
79 92 return ctx
@@ -1,134 +1,134 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """
19 19 Adjustments to Mercurial
20 20
21 21 Intentionally kept separate from `hgcompat` and `hg`, so that these patches can
22 22 be applied without having to import the whole Mercurial machinery.
23 23
24 24 Imports are function local, so that just importing this module does not cause
25 25 side-effects other than these functions being defined.
26 26 """
27 27
28 28 import logging
29 29
30 30
31 31 def patch_largefiles_capabilities():
32 32 """
33 33 Patches the capabilities function in the largefiles extension.
34 34 """
35 35 from vcsserver import hgcompat
36 36 lfproto = hgcompat.largefiles.proto
37 37 wrapper = _dynamic_capabilities_wrapper(
38 38 lfproto, hgcompat.extensions.extensions)
39 39 lfproto._capabilities = wrapper
40 40
41 41
42 42 def _dynamic_capabilities_wrapper(lfproto, extensions):
43 43
44 44 wrapped_capabilities = lfproto._capabilities
45 45 logger = logging.getLogger('vcsserver.hg')
46 46
47 47 def _dynamic_capabilities(orig, repo, proto):
48 48 """
49 49 Adds dynamic behavior, so that the capability is only added if the
50 50 extension is enabled in the current ui object.
51 51 """
52 52 if 'largefiles' in dict(extensions(repo.ui)):
53 53 logger.debug('Extension largefiles enabled')
54 54 calc_capabilities = wrapped_capabilities
55 55 return calc_capabilities(orig, repo, proto)
56 56 else:
57 57 logger.debug('Extension largefiles disabled')
58 58 return orig(repo, proto)
59 59
60 60 return _dynamic_capabilities
61 61
62 62
63 63 def patch_subrepo_type_mapping():
64 64 from collections import defaultdict
65 from hgcompat import subrepo, subrepoutil
65 from .hgcompat import subrepo, subrepoutil
66 66 from vcsserver.exceptions import SubrepoMergeException
67 67
68 68 class NoOpSubrepo(subrepo.abstractsubrepo):
69 69
70 70 def __init__(self, ctx, path, *args, **kwargs):
71 71 """Initialize abstractsubrepo part
72 72
73 73 ``ctx`` is the context referring this subrepository in the
74 74 parent repository.
75 75
76 76 ``path`` is the path to this subrepository as seen from
77 77 innermost repository.
78 78 """
79 79 self.ui = ctx.repo().ui
80 80 self._ctx = ctx
81 81 self._path = path
82 82
83 83 def storeclean(self, path):
84 84 """
85 85 returns true if the repository has not changed since it was last
86 86 cloned from or pushed to a given repository.
87 87 """
88 88 return True
89 89
90 90 def dirty(self, ignoreupdate=False, missing=False):
91 91 """returns true if the dirstate of the subrepo is dirty or does not
92 92 match current stored state. If ignoreupdate is true, only check
93 93 whether the subrepo has uncommitted changes in its dirstate.
94 94 """
95 95 return False
96 96
97 97 def basestate(self):
98 98 """current working directory base state, disregarding .hgsubstate
99 99 state and working directory modifications"""
100 100 substate = subrepoutil.state(self._ctx, self.ui)
101 101 file_system_path, rev, repotype = substate.get(self._path)
102 102 return rev
103 103
104 104 def remove(self):
105 105 """remove the subrepo
106 106
107 107 (should verify the dirstate is not dirty first)
108 108 """
109 109 pass
110 110
111 111 def get(self, state, overwrite=False):
112 112 """run whatever commands are needed to put the subrepo into
113 113 this state
114 114 """
115 115 pass
116 116
117 117 def merge(self, state):
118 118 """merge currently-saved state with the new state."""
119 119 raise SubrepoMergeException()()
120 120
121 121 def push(self, opts):
122 122 """perform whatever action is analogous to 'hg push'
123 123
124 124 This may be a no-op on some systems.
125 125 """
126 126 pass
127 127
128 128 # Patch subrepo type mapping to always return our NoOpSubrepo class
129 129 # whenever a subrepo class is looked up.
130 130 subrepo.types = {
131 131 'hg': NoOpSubrepo,
132 132 'git': NoOpSubrepo,
133 133 'svn': NoOpSubrepo
134 134 }
@@ -1,205 +1,220 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18 import re
21 19 import os
22 20 import sys
23 21 import datetime
24 22 import logging
25 23 import pkg_resources
26 24
27 25 import vcsserver
26 from vcsserver.str_utils import safe_bytes
28 27
29 28 log = logging.getLogger(__name__)
30 29
30 HOOKS_DIR_MODE = 0o755
31 HOOKS_FILE_MODE = 0o755
32
33
34 def set_permissions_if_needed(path_to_check, perms: oct):
35 # Get current permissions
36 current_permissions = os.stat(path_to_check).st_mode & 0o777 # Extract permission bits
37
38 # Check if current permissions are lower than required
39 if current_permissions < int(perms):
40 # Change the permissions if they are lower than required
41 os.chmod(path_to_check, perms)
42
31 43
32 44 def get_git_hooks_path(repo_path, bare):
33 45 hooks_path = os.path.join(repo_path, 'hooks')
34 46 if not bare:
35 47 hooks_path = os.path.join(repo_path, '.git', 'hooks')
36 48
37 49 return hooks_path
38 50
39 51
40 52 def install_git_hooks(repo_path, bare, executable=None, force_create=False):
41 53 """
42 54 Creates a RhodeCode hook inside a git repository
43 55
44 56 :param repo_path: path to repository
57 :param bare: defines if repository is considered a bare git repo
45 58 :param executable: binary executable to put in the hooks
46 :param force_create: Create even if same name hook exists
59 :param force_create: Creates even if the same name hook exists
47 60 """
48 61 executable = executable or sys.executable
49 62 hooks_path = get_git_hooks_path(repo_path, bare)
50 63
51 if not os.path.isdir(hooks_path):
52 os.makedirs(hooks_path, mode=0o777)
64 # we always call it to ensure dir exists and it has a proper mode
65 if not os.path.exists(hooks_path):
66 # If it doesn't exist, create a new directory with the specified mode
67 os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
68 # If it exists, change the directory's mode to the specified mode
69 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
53 70
54 71 tmpl_post = pkg_resources.resource_string(
55 72 'vcsserver', '/'.join(
56 73 ('hook_utils', 'hook_templates', 'git_post_receive.py.tmpl')))
57 74 tmpl_pre = pkg_resources.resource_string(
58 75 'vcsserver', '/'.join(
59 76 ('hook_utils', 'hook_templates', 'git_pre_receive.py.tmpl')))
60 77
61 78 path = '' # not used for now
62 79 timestamp = datetime.datetime.utcnow().isoformat()
63 80
64 81 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
65 82 log.debug('Installing git hook in repo %s', repo_path)
66 _hook_file = os.path.join(hooks_path, '%s-receive' % h_type)
83 _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
67 84 _rhodecode_hook = check_rhodecode_hook(_hook_file)
68 85
69 86 if _rhodecode_hook or force_create:
70 87 log.debug('writing git %s hook file at %s !', h_type, _hook_file)
71 88 try:
72 89 with open(_hook_file, 'wb') as f:
73 template = template.replace(
74 '_TMPL_', vcsserver.__version__)
75 template = template.replace('_DATE_', timestamp)
76 template = template.replace('_ENV_', executable)
77 template = template.replace('_PATH_', path)
90 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
91 template = template.replace(b'_DATE_', safe_bytes(timestamp))
92 template = template.replace(b'_ENV_', safe_bytes(executable))
93 template = template.replace(b'_PATH_', safe_bytes(path))
78 94 f.write(template)
79 os.chmod(_hook_file, 0o755)
80 except IOError:
95 set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
96 except OSError:
81 97 log.exception('error writing hook file %s', _hook_file)
82 98 else:
83 99 log.debug('skipping writing hook file')
84 100
85 101 return True
86 102
87 103
88 104 def get_svn_hooks_path(repo_path):
89 105 hooks_path = os.path.join(repo_path, 'hooks')
90 106
91 107 return hooks_path
92 108
93 109
94 110 def install_svn_hooks(repo_path, executable=None, force_create=False):
95 111 """
96 112 Creates RhodeCode hooks inside a svn repository
97 113
98 114 :param repo_path: path to repository
99 115 :param executable: binary executable to put in the hooks
100 116 :param force_create: Create even if same name hook exists
101 117 """
102 118 executable = executable or sys.executable
103 119 hooks_path = get_svn_hooks_path(repo_path)
104 120 if not os.path.isdir(hooks_path):
105 os.makedirs(hooks_path, mode=0o777)
121 os.makedirs(hooks_path, mode=0o777, exist_ok=True)
106 122
107 123 tmpl_post = pkg_resources.resource_string(
108 124 'vcsserver', '/'.join(
109 125 ('hook_utils', 'hook_templates', 'svn_post_commit_hook.py.tmpl')))
110 126 tmpl_pre = pkg_resources.resource_string(
111 127 'vcsserver', '/'.join(
112 128 ('hook_utils', 'hook_templates', 'svn_pre_commit_hook.py.tmpl')))
113 129
114 130 path = '' # not used for now
115 131 timestamp = datetime.datetime.utcnow().isoformat()
116 132
117 133 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
118 134 log.debug('Installing svn hook in repo %s', repo_path)
119 _hook_file = os.path.join(hooks_path, '%s-commit' % h_type)
135 _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
120 136 _rhodecode_hook = check_rhodecode_hook(_hook_file)
121 137
122 138 if _rhodecode_hook or force_create:
123 139 log.debug('writing svn %s hook file at %s !', h_type, _hook_file)
124 140
125 141 try:
126 142 with open(_hook_file, 'wb') as f:
127 template = template.replace(
128 '_TMPL_', vcsserver.__version__)
129 template = template.replace('_DATE_', timestamp)
130 template = template.replace('_ENV_', executable)
131 template = template.replace('_PATH_', path)
143 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
144 template = template.replace(b'_DATE_', safe_bytes(timestamp))
145 template = template.replace(b'_ENV_', safe_bytes(executable))
146 template = template.replace(b'_PATH_', safe_bytes(path))
132 147
133 148 f.write(template)
134 149 os.chmod(_hook_file, 0o755)
135 except IOError:
150 except OSError:
136 151 log.exception('error writing hook file %s', _hook_file)
137 152 else:
138 153 log.debug('skipping writing hook file')
139 154
140 155 return True
141 156
142 157
143 158 def get_version_from_hook(hook_path):
144 version = ''
159 version = b''
145 160 hook_content = read_hook_content(hook_path)
146 matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content)
161 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
147 162 if matches:
148 163 try:
149 164 version = matches.groups()[0]
150 165 log.debug('got version %s from hooks.', version)
151 166 except Exception:
152 167 log.exception("Exception while reading the hook version.")
153 return version.replace("'", "")
168 return version.replace(b"'", b"")
154 169
155 170
156 171 def check_rhodecode_hook(hook_path):
157 172 """
158 173 Check if the hook was created by RhodeCode
159 174 """
160 175 if not os.path.exists(hook_path):
161 176 return True
162 177
163 178 log.debug('hook exists, checking if it is from RhodeCode')
164 179
165 180 version = get_version_from_hook(hook_path)
166 181 if version:
167 182 return True
168 183
169 184 return False
170 185
171 186
172 def read_hook_content(hook_path):
173 content = ''
187 def read_hook_content(hook_path) -> bytes:
188 content = b''
174 189 if os.path.isfile(hook_path):
175 190 with open(hook_path, 'rb') as f:
176 191 content = f.read()
177 192 return content
178 193
179 194
180 195 def get_git_pre_hook_version(repo_path, bare):
181 196 hooks_path = get_git_hooks_path(repo_path, bare)
182 197 _hook_file = os.path.join(hooks_path, 'pre-receive')
183 198 version = get_version_from_hook(_hook_file)
184 199 return version
185 200
186 201
187 202 def get_git_post_hook_version(repo_path, bare):
188 203 hooks_path = get_git_hooks_path(repo_path, bare)
189 204 _hook_file = os.path.join(hooks_path, 'post-receive')
190 205 version = get_version_from_hook(_hook_file)
191 206 return version
192 207
193 208
194 209 def get_svn_pre_hook_version(repo_path):
195 210 hooks_path = get_svn_hooks_path(repo_path)
196 211 _hook_file = os.path.join(hooks_path, 'pre-commit')
197 212 version = get_version_from_hook(_hook_file)
198 213 return version
199 214
200 215
201 216 def get_svn_post_hook_version(repo_path):
202 217 hooks_path = get_svn_hooks_path(repo_path)
203 218 _hook_file = os.path.join(hooks_path, 'post-commit')
204 219 version = get_version_from_hook(_hook_file)
205 220 return version
@@ -1,51 +1,51 b''
1 1 #!_ENV_
2 2 import os
3 3 import sys
4 4 path_adjust = [_PATH_]
5 5
6 6 if path_adjust:
7 7 sys.path = path_adjust
8 8
9 9 try:
10 10 from vcsserver import hooks
11 11 except ImportError:
12 12 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 13 import traceback
14 print traceback.format_exc()
14 print(traceback.format_exc())
15 15 hooks = None
16 16
17 17
18 18 # TIMESTAMP: _DATE_
19 19 RC_HOOK_VER = '_TMPL_'
20 20
21 21
22 22 def main():
23 23 if hooks is None:
24 24 # exit with success if we cannot import vcsserver.hooks !!
25 25 # this allows simply push to this repo even without rhodecode
26 26 sys.exit(0)
27 27
28 28 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
29 29 sys.exit(0)
30 30
31 31 repo_path = os.getcwd()
32 32 push_data = sys.stdin.readlines()
33 33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
34 34 # os.environ is modified here by a subprocess call that
35 35 # runs git and later git executes this hook.
36 36 # Environ gets some additional info from rhodecode system
37 37 # like IP or username from basic-auth
38 38 try:
39 39 result = hooks.git_post_receive(repo_path, push_data, os.environ)
40 40 sys.exit(result)
41 41 except Exception as error:
42 42 # TODO: johbo: Improve handling of this special case
43 43 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 44 raise
45 print 'ERROR:', error
45 print(f'ERROR: {error}')
46 46 sys.exit(1)
47 47 sys.exit(0)
48 48
49 49
50 50 if __name__ == '__main__':
51 51 main()
@@ -1,51 +1,51 b''
1 1 #!_ENV_
2 2 import os
3 3 import sys
4 4 path_adjust = [_PATH_]
5 5
6 6 if path_adjust:
7 7 sys.path = path_adjust
8 8
9 9 try:
10 10 from vcsserver import hooks
11 11 except ImportError:
12 12 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 13 import traceback
14 print traceback.format_exc()
14 print(traceback.format_exc())
15 15 hooks = None
16 16
17 17
18 18 # TIMESTAMP: _DATE_
19 19 RC_HOOK_VER = '_TMPL_'
20 20
21 21
22 22 def main():
23 23 if hooks is None:
24 24 # exit with success if we cannot import vcsserver.hooks !!
25 25 # this allows simply push to this repo even without rhodecode
26 26 sys.exit(0)
27 27
28 28 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_GIT_HOOKS'):
29 29 sys.exit(0)
30 30
31 31 repo_path = os.getcwd()
32 32 push_data = sys.stdin.readlines()
33 33 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
34 34 # os.environ is modified here by a subprocess call that
35 35 # runs git and later git executes this hook.
36 36 # Environ gets some additional info from rhodecode system
37 37 # like IP or username from basic-auth
38 38 try:
39 39 result = hooks.git_pre_receive(repo_path, push_data, os.environ)
40 40 sys.exit(result)
41 41 except Exception as error:
42 42 # TODO: johbo: Improve handling of this special case
43 43 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 44 raise
45 print 'ERROR:', error
45 print(f'ERROR: {error}')
46 46 sys.exit(1)
47 47 sys.exit(0)
48 48
49 49
50 50 if __name__ == '__main__':
51 51 main()
@@ -1,50 +1,50 b''
1 1 #!_ENV_
2 2
3 3 import os
4 4 import sys
5 5 path_adjust = [_PATH_]
6 6
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 10 try:
11 11 from vcsserver import hooks
12 12 except ImportError:
13 13 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 14 import traceback
15 print traceback.format_exc()
15 print(traceback.format_exc())
16 16 hooks = None
17 17
18 18
19 19 # TIMESTAMP: _DATE_
20 20 RC_HOOK_VER = '_TMPL_'
21 21
22 22
23 23 def main():
24 24 if hooks is None:
25 25 # exit with success if we cannot import vcsserver.hooks !!
26 26 # this allows simply push to this repo even without rhodecode
27 27 sys.exit(0)
28 28
29 29 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
30 30 sys.exit(0)
31 31 repo_path = os.getcwd()
32 32 push_data = sys.argv[1:]
33 33
34 34 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
35 35
36 36 try:
37 37 result = hooks.svn_post_commit(repo_path, push_data, os.environ)
38 38 sys.exit(result)
39 39 except Exception as error:
40 40 # TODO: johbo: Improve handling of this special case
41 41 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
42 42 raise
43 print 'ERROR:', error
43 print(f'ERROR: {error}')
44 44 sys.exit(1)
45 45 sys.exit(0)
46 46
47 47
48 48
49 49 if __name__ == '__main__':
50 50 main()
@@ -1,52 +1,52 b''
1 1 #!_ENV_
2 2
3 3 import os
4 4 import sys
5 5 path_adjust = [_PATH_]
6 6
7 7 if path_adjust:
8 8 sys.path = path_adjust
9 9
10 10 try:
11 11 from vcsserver import hooks
12 12 except ImportError:
13 13 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 14 import traceback
15 print traceback.format_exc()
15 print(traceback.format_exc())
16 16 hooks = None
17 17
18 18
19 19 # TIMESTAMP: _DATE_
20 20 RC_HOOK_VER = '_TMPL_'
21 21
22 22
23 23 def main():
24 24 if os.environ.get('SSH_READ_ONLY') == '1':
25 25 sys.stderr.write('Only read-only access is allowed')
26 26 sys.exit(1)
27 27
28 28 if hooks is None:
29 29 # exit with success if we cannot import vcsserver.hooks !!
30 30 # this allows simply push to this repo even without rhodecode
31 31 sys.exit(0)
32 32 if os.environ.get('RC_SKIP_HOOKS') or os.environ.get('RC_SKIP_SVN_HOOKS'):
33 33 sys.exit(0)
34 34 repo_path = os.getcwd()
35 35 push_data = sys.argv[1:]
36 36
37 37 os.environ['RC_HOOK_VER'] = RC_HOOK_VER
38 38
39 39 try:
40 40 result = hooks.svn_pre_commit(repo_path, push_data, os.environ)
41 41 sys.exit(result)
42 42 except Exception as error:
43 43 # TODO: johbo: Improve handling of this special case
44 44 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
45 45 raise
46 print 'ERROR:', error
46 print(f'ERROR: {error}')
47 47 sys.exit(1)
48 48 sys.exit(0)
49 49
50 50
51 51 if __name__ == '__main__':
52 52 main()
@@ -1,729 +1,795 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18 import io
21 19 import os
22 20 import sys
23 21 import logging
24 22 import collections
25 import importlib
26 23 import base64
24 import msgpack
25 import dataclasses
26 import pygit2
27 27
28 from httplib import HTTPConnection
29
28 import http.client
29 from celery import Celery
30 30
31 31 import mercurial.scmutil
32 32 import mercurial.node
33 import simplejson as json
34 33
34 from vcsserver.lib.rc_json import json
35 35 from vcsserver import exceptions, subprocessio, settings
36 from vcsserver.str_utils import ascii_str, safe_str
37 from vcsserver.remote.git_remote import Repository
36 38
39 celery_app = Celery('__vcsserver__')
37 40 log = logging.getLogger(__name__)
38 41
39 42
40 class HooksHttpClient(object):
43 class HooksHttpClient:
44 proto = 'msgpack.v1'
41 45 connection = None
42 46
43 47 def __init__(self, hooks_uri):
44 48 self.hooks_uri = hooks_uri
45 49
50 def __repr__(self):
51 return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
52
46 53 def __call__(self, method, extras):
47 connection = HTTPConnection(self.hooks_uri)
48 body = self._serialize(method, extras)
49 try:
50 connection.request('POST', '/', body)
51 except Exception:
52 log.error('Hooks calling Connection failed on %s', connection.__dict__)
53 raise
54 response = connection.getresponse()
55
56 response_data = response.read()
54 connection = http.client.HTTPConnection(self.hooks_uri)
55 # binary msgpack body
56 headers, body = self._serialize(method, extras)
57 log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
57 58
58 59 try:
59 return json.loads(response_data)
60 except Exception:
61 log.exception('Failed to decode hook response json data. '
62 'response_code:%s, raw_data:%s',
63 response.status, response_data)
64 raise
60 try:
61 connection.request('POST', '/', body, headers)
62 except Exception as error:
63 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
64 raise
65 65
66 def _serialize(self, hook_name, extras):
66 response = connection.getresponse()
67 try:
68 return msgpack.load(response)
69 except Exception:
70 response_data = response.read()
71 log.exception('Failed to decode hook response json data. '
72 'response_code:%s, raw_data:%s',
73 response.status, response_data)
74 raise
75 finally:
76 connection.close()
77
78 @classmethod
79 def _serialize(cls, hook_name, extras):
67 80 data = {
68 81 'method': hook_name,
69 82 'extras': extras
70 83 }
71 return json.dumps(data)
84 headers = {
85 "rc-hooks-protocol": cls.proto,
86 "Connection": "keep-alive"
87 }
88 return headers, msgpack.packb(data)
72 89
73 90
74 class HooksDummyClient(object):
75 def __init__(self, hooks_module):
76 self._hooks_module = importlib.import_module(hooks_module)
91 class HooksCeleryClient:
92 TASK_TIMEOUT = 60 # time in seconds
77 93
78 def __call__(self, hook_name, extras):
79 with self._hooks_module.Hooks() as hooks:
80 return getattr(hooks, hook_name)(extras)
94 def __init__(self, queue, backend):
95 celery_app.config_from_object({
96 'broker_url': queue, 'result_backend': backend,
97 'broker_connection_retry_on_startup': True,
98 'task_serializer': 'msgpack',
99 'accept_content': ['json', 'msgpack'],
100 'result_serializer': 'msgpack',
101 'result_accept_content': ['json', 'msgpack']
102 })
103 self.celery_app = celery_app
104
105 def __call__(self, method, extras):
106 inquired_task = self.celery_app.signature(
107 f'rhodecode.lib.celerylib.tasks.{method}'
108 )
109 return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
81 110
82 111
83 class HooksShadowRepoClient(object):
112 class HooksShadowRepoClient:
84 113
85 114 def __call__(self, hook_name, extras):
86 115 return {'output': '', 'status': 0}
87 116
88 117
89 class RemoteMessageWriter(object):
118 class RemoteMessageWriter:
90 119 """Writer base class."""
91 120 def write(self, message):
92 121 raise NotImplementedError()
93 122
94 123
95 124 class HgMessageWriter(RemoteMessageWriter):
96 125 """Writer that knows how to send messages to mercurial clients."""
97 126
98 127 def __init__(self, ui):
99 128 self.ui = ui
100 129
101 def write(self, message):
130 def write(self, message: str):
102 131 # TODO: Check why the quiet flag is set by default.
103 132 old = self.ui.quiet
104 133 self.ui.quiet = False
105 134 self.ui.status(message.encode('utf-8'))
106 135 self.ui.quiet = old
107 136
108 137
109 138 class GitMessageWriter(RemoteMessageWriter):
110 139 """Writer that knows how to send messages to git clients."""
111 140
112 141 def __init__(self, stdout=None):
113 142 self.stdout = stdout or sys.stdout
114 143
115 def write(self, message):
116 self.stdout.write(message.encode('utf-8'))
144 def write(self, message: str):
145 self.stdout.write(message)
117 146
118 147
119 148 class SvnMessageWriter(RemoteMessageWriter):
120 149 """Writer that knows how to send messages to svn clients."""
121 150
122 151 def __init__(self, stderr=None):
123 152 # SVN needs data sent to stderr for back-to-client messaging
124 153 self.stderr = stderr or sys.stderr
125 154
126 155 def write(self, message):
127 156 self.stderr.write(message.encode('utf-8'))
128 157
129 158
130 159 def _handle_exception(result):
131 160 exception_class = result.get('exception')
132 161 exception_traceback = result.get('exception_traceback')
162 log.debug('Handling hook-call exception: %s', exception_class)
133 163
134 164 if exception_traceback:
135 165 log.error('Got traceback from remote call:%s', exception_traceback)
136 166
137 167 if exception_class == 'HTTPLockedRC':
138 168 raise exceptions.RepositoryLockedException()(*result['exception_args'])
139 169 elif exception_class == 'HTTPBranchProtected':
140 170 raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
141 171 elif exception_class == 'RepositoryError':
142 172 raise exceptions.VcsException()(*result['exception_args'])
143 173 elif exception_class:
144 raise Exception('Got remote exception "%s" with args "%s"' %
145 (exception_class, result['exception_args']))
174 raise Exception(
175 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
176 )
146 177
147 178
148 179 def _get_hooks_client(extras):
149 180 hooks_uri = extras.get('hooks_uri')
181 task_queue = extras.get('task_queue')
182 task_backend = extras.get('task_backend')
150 183 is_shadow_repo = extras.get('is_shadow_repo')
184
151 185 if hooks_uri:
152 return HooksHttpClient(extras['hooks_uri'])
186 return HooksHttpClient(hooks_uri)
187 elif task_queue and task_backend:
188 return HooksCeleryClient(task_queue, task_backend)
153 189 elif is_shadow_repo:
154 190 return HooksShadowRepoClient()
155 191 else:
156 return HooksDummyClient(extras['hooks_module'])
192 raise Exception("Hooks client not found!")
157 193
158 194
159 195 def _call_hook(hook_name, extras, writer):
160 196 hooks_client = _get_hooks_client(extras)
161 197 log.debug('Hooks, using client:%s', hooks_client)
162 198 result = hooks_client(hook_name, extras)
163 199 log.debug('Hooks got result: %s', result)
164
165 200 _handle_exception(result)
166 201 writer.write(result['output'])
167 202
168 203 return result['status']
169 204
170 205
171 206 def _extras_from_ui(ui):
172 hook_data = ui.config('rhodecode', 'RC_SCM_DATA')
207 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
173 208 if not hook_data:
174 209 # maybe it's inside environ ?
175 210 env_hook_data = os.environ.get('RC_SCM_DATA')
176 211 if env_hook_data:
177 212 hook_data = env_hook_data
178 213
179 214 extras = {}
180 215 if hook_data:
181 216 extras = json.loads(hook_data)
182 217 return extras
183 218
184 219
185 220 def _rev_range_hash(repo, node, check_heads=False):
186 221 from vcsserver.hgcompat import get_ctx
187 222
188 223 commits = []
189 224 revs = []
190 225 start = get_ctx(repo, node).rev()
191 226 end = len(repo)
192 227 for rev in range(start, end):
193 228 revs.append(rev)
194 229 ctx = get_ctx(repo, rev)
195 commit_id = mercurial.node.hex(ctx.node())
196 branch = ctx.branch()
230 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
231 branch = safe_str(ctx.branch())
197 232 commits.append((commit_id, branch))
198 233
199 234 parent_heads = []
200 235 if check_heads:
201 236 parent_heads = _check_heads(repo, start, end, revs)
202 237 return commits, parent_heads
203 238
204 239
205 240 def _check_heads(repo, start, end, commits):
206 241 from vcsserver.hgcompat import get_ctx
207 242 changelog = repo.changelog
208 243 parents = set()
209 244
210 245 for new_rev in commits:
211 246 for p in changelog.parentrevs(new_rev):
212 247 if p == mercurial.node.nullrev:
213 248 continue
214 249 if p < start:
215 250 parents.add(p)
216 251
217 252 for p in parents:
218 253 branch = get_ctx(repo, p).branch()
219 254 # The heads descending from that parent, on the same branch
220 parent_heads = set([p])
221 reachable = set([p])
222 for x in xrange(p + 1, end):
255 parent_heads = {p}
256 reachable = {p}
257 for x in range(p + 1, end):
223 258 if get_ctx(repo, x).branch() != branch:
224 259 continue
225 260 for pp in changelog.parentrevs(x):
226 261 if pp in reachable:
227 262 reachable.add(x)
228 263 parent_heads.discard(pp)
229 264 parent_heads.add(x)
230 265 # More than one head? Suggest merging
231 266 if len(parent_heads) > 1:
232 267 return list(parent_heads)
233 268
234 269 return []
235 270
236 271
237 272 def _get_git_env():
238 273 env = {}
239 274 for k, v in os.environ.items():
240 275 if k.startswith('GIT'):
241 276 env[k] = v
242 277
243 278 # serialized version
244 279 return [(k, v) for k, v in env.items()]
245 280
246 281
247 282 def _get_hg_env(old_rev, new_rev, txnid, repo_path):
248 283 env = {}
249 284 for k, v in os.environ.items():
250 285 if k.startswith('HG'):
251 286 env[k] = v
252 287
253 288 env['HG_NODE'] = old_rev
254 289 env['HG_NODE_LAST'] = new_rev
255 290 env['HG_TXNID'] = txnid
256 291 env['HG_PENDING'] = repo_path
257 292
258 293 return [(k, v) for k, v in env.items()]
259 294
260 295
261 296 def repo_size(ui, repo, **kwargs):
262 297 extras = _extras_from_ui(ui)
263 298 return _call_hook('repo_size', extras, HgMessageWriter(ui))
264 299
265 300
266 301 def pre_pull(ui, repo, **kwargs):
267 302 extras = _extras_from_ui(ui)
268 303 return _call_hook('pre_pull', extras, HgMessageWriter(ui))
269 304
270 305
271 306 def pre_pull_ssh(ui, repo, **kwargs):
272 307 extras = _extras_from_ui(ui)
273 308 if extras and extras.get('SSH'):
274 309 return pre_pull(ui, repo, **kwargs)
275 310 return 0
276 311
277 312
278 313 def post_pull(ui, repo, **kwargs):
279 314 extras = _extras_from_ui(ui)
280 315 return _call_hook('post_pull', extras, HgMessageWriter(ui))
281 316
282 317
283 318 def post_pull_ssh(ui, repo, **kwargs):
284 319 extras = _extras_from_ui(ui)
285 320 if extras and extras.get('SSH'):
286 321 return post_pull(ui, repo, **kwargs)
287 322 return 0
288 323
289 324
290 325 def pre_push(ui, repo, node=None, **kwargs):
291 326 """
292 327 Mercurial pre_push hook
293 328 """
294 329 extras = _extras_from_ui(ui)
295 330 detect_force_push = extras.get('detect_force_push')
296 331
297 332 rev_data = []
298 if node and kwargs.get('hooktype') == 'pretxnchangegroup':
333 hook_type: str = safe_str(kwargs.get('hooktype'))
334
335 if node and hook_type == 'pretxnchangegroup':
299 336 branches = collections.defaultdict(list)
300 337 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
301 338 for commit_id, branch in commits:
302 339 branches[branch].append(commit_id)
303 340
304 341 for branch, commits in branches.items():
305 old_rev = kwargs.get('node_last') or commits[0]
342 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
306 343 rev_data.append({
307 344 'total_commits': len(commits),
308 345 'old_rev': old_rev,
309 346 'new_rev': commits[-1],
310 347 'ref': '',
311 348 'type': 'branch',
312 349 'name': branch,
313 350 })
314 351
315 352 for push_ref in rev_data:
316 353 push_ref['multiple_heads'] = _heads
317 354
318 355 repo_path = os.path.join(
319 356 extras.get('repo_store', ''), extras.get('repository', ''))
320 357 push_ref['hg_env'] = _get_hg_env(
321 358 old_rev=push_ref['old_rev'],
322 new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'),
359 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
323 360 repo_path=repo_path)
324 361
325 extras['hook_type'] = kwargs.get('hooktype', 'pre_push')
362 extras['hook_type'] = hook_type or 'pre_push'
326 363 extras['commit_ids'] = rev_data
327 364
328 365 return _call_hook('pre_push', extras, HgMessageWriter(ui))
329 366
330 367
331 368 def pre_push_ssh(ui, repo, node=None, **kwargs):
332 369 extras = _extras_from_ui(ui)
333 370 if extras.get('SSH'):
334 371 return pre_push(ui, repo, node, **kwargs)
335 372
336 373 return 0
337 374
338 375
339 376 def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
340 377 """
341 378 Mercurial pre_push hook for SSH
342 379 """
343 380 extras = _extras_from_ui(ui)
344 381 if extras.get('SSH'):
345 382 permission = extras['SSH_PERMISSIONS']
346 383
347 384 if 'repository.write' == permission or 'repository.admin' == permission:
348 385 return 0
349 386
350 387 # non-zero ret code
351 388 return 1
352 389
353 390 return 0
354 391
355 392
356 393 def post_push(ui, repo, node, **kwargs):
357 394 """
358 395 Mercurial post_push hook
359 396 """
360 397 extras = _extras_from_ui(ui)
361 398
362 399 commit_ids = []
363 400 branches = []
364 401 bookmarks = []
365 402 tags = []
403 hook_type: str = safe_str(kwargs.get('hooktype'))
366 404
367 405 commits, _heads = _rev_range_hash(repo, node)
368 406 for commit_id, branch in commits:
369 407 commit_ids.append(commit_id)
370 408 if branch not in branches:
371 409 branches.append(branch)
372 410
373 if hasattr(ui, '_rc_pushkey_branches'):
374 bookmarks = ui._rc_pushkey_branches
411 if hasattr(ui, '_rc_pushkey_bookmarks'):
412 bookmarks = ui._rc_pushkey_bookmarks
375 413
376 extras['hook_type'] = kwargs.get('hooktype', 'post_push')
414 extras['hook_type'] = hook_type or 'post_push'
377 415 extras['commit_ids'] = commit_ids
416
378 417 extras['new_refs'] = {
379 418 'branches': branches,
380 419 'bookmarks': bookmarks,
381 420 'tags': tags
382 421 }
383 422
384 423 return _call_hook('post_push', extras, HgMessageWriter(ui))
385 424
386 425
387 426 def post_push_ssh(ui, repo, node, **kwargs):
388 427 """
389 428 Mercurial post_push hook for SSH
390 429 """
391 430 if _extras_from_ui(ui).get('SSH'):
392 431 return post_push(ui, repo, node, **kwargs)
393 432 return 0
394 433
395 434
396 435 def key_push(ui, repo, **kwargs):
397 436 from vcsserver.hgcompat import get_ctx
398 if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks':
437
438 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
399 439 # store new bookmarks in our UI object propagated later to post_push
400 ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks()
440 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
401 441 return
402 442
403 443
404 444 # backward compat
405 445 log_pull_action = post_pull
406 446
407 447 # backward compat
408 448 log_push_action = post_push
409 449
410 450
411 451 def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
412 452 """
413 453 Old hook name: keep here for backward compatibility.
414 454
415 455 This is only required when the installed git hooks are not upgraded.
416 456 """
417 457 pass
418 458
419 459
420 460 def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
421 461 """
422 462 Old hook name: keep here for backward compatibility.
423 463
424 464 This is only required when the installed git hooks are not upgraded.
425 465 """
426 466 pass
427 467
428 468
429 HookResponse = collections.namedtuple('HookResponse', ('status', 'output'))
469 @dataclasses.dataclass
470 class HookResponse:
471 status: int
472 output: str
430 473
431 474
432 def git_pre_pull(extras):
475 def git_pre_pull(extras) -> HookResponse:
433 476 """
434 477 Pre pull hook.
435 478
436 479 :param extras: dictionary containing the keys defined in simplevcs
437 480 :type extras: dict
438 481
439 482 :return: status code of the hook. 0 for success.
440 483 :rtype: int
441 484 """
485
442 486 if 'pull' not in extras['hooks']:
443 487 return HookResponse(0, '')
444 488
445 stdout = io.BytesIO()
489 stdout = io.StringIO()
446 490 try:
447 status = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
491 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
492
448 493 except Exception as error:
449 status = 128
450 stdout.write('ERROR: %s\n' % str(error))
494 log.exception('Failed to call pre_pull hook')
495 status_code = 128
496 stdout.write(f'ERROR: {error}\n')
451 497
452 return HookResponse(status, stdout.getvalue())
498 return HookResponse(status_code, stdout.getvalue())
453 499
454 500
455 def git_post_pull(extras):
501 def git_post_pull(extras) -> HookResponse:
456 502 """
457 503 Post pull hook.
458 504
459 505 :param extras: dictionary containing the keys defined in simplevcs
460 506 :type extras: dict
461 507
462 508 :return: status code of the hook. 0 for success.
463 509 :rtype: int
464 510 """
465 511 if 'pull' not in extras['hooks']:
466 512 return HookResponse(0, '')
467 513
468 stdout = io.BytesIO()
514 stdout = io.StringIO()
469 515 try:
470 516 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
471 517 except Exception as error:
472 518 status = 128
473 stdout.write('ERROR: %s\n' % error)
519 stdout.write(f'ERROR: {error}\n')
474 520
475 521 return HookResponse(status, stdout.getvalue())
476 522
477 523
478 524 def _parse_git_ref_lines(revision_lines):
479 525 rev_data = []
480 526 for revision_line in revision_lines or []:
481 527 old_rev, new_rev, ref = revision_line.strip().split(' ')
482 528 ref_data = ref.split('/', 2)
483 529 if ref_data[1] in ('tags', 'heads'):
484 530 rev_data.append({
485 531 # NOTE(marcink):
486 532 # we're unable to tell total_commits for git at this point
487 533 # but we set the variable for consistency with GIT
488 534 'total_commits': -1,
489 535 'old_rev': old_rev,
490 536 'new_rev': new_rev,
491 537 'ref': ref,
492 538 'type': ref_data[1],
493 539 'name': ref_data[2],
494 540 })
495 541 return rev_data
496 542
497 543
498 def git_pre_receive(unused_repo_path, revision_lines, env):
544 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
499 545 """
500 546 Pre push hook.
501 547
502 :param extras: dictionary containing the keys defined in simplevcs
503 :type extras: dict
504
505 548 :return: status code of the hook. 0 for success.
506 :rtype: int
507 549 """
508 550 extras = json.loads(env['RC_SCM_DATA'])
509 551 rev_data = _parse_git_ref_lines(revision_lines)
510 552 if 'push' not in extras['hooks']:
511 553 return 0
512 554 empty_commit_id = '0' * 40
513 555
514 556 detect_force_push = extras.get('detect_force_push')
515 557
516 558 for push_ref in rev_data:
517 559 # store our git-env which holds the temp store
518 560 push_ref['git_env'] = _get_git_env()
519 561 push_ref['pruned_sha'] = ''
520 562 if not detect_force_push:
521 563 # don't check for forced-push when we don't need to
522 564 continue
523 565
524 566 type_ = push_ref['type']
525 567 new_branch = push_ref['old_rev'] == empty_commit_id
526 568 delete_branch = push_ref['new_rev'] == empty_commit_id
527 569 if type_ == 'heads' and not (new_branch or delete_branch):
528 570 old_rev = push_ref['old_rev']
529 571 new_rev = push_ref['new_rev']
530 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
572 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
531 573 stdout, stderr = subprocessio.run_command(
532 574 cmd, env=os.environ.copy())
533 575 # means we're having some non-reachable objects, this forced push was used
534 576 if stdout:
535 577 push_ref['pruned_sha'] = stdout.splitlines()
536 578
537 579 extras['hook_type'] = 'pre_receive'
538 580 extras['commit_ids'] = rev_data
539 return _call_hook('pre_push', extras, GitMessageWriter())
581
582 stdout = sys.stdout
583 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
584
585 return status_code
540 586
541 587
542 def git_post_receive(unused_repo_path, revision_lines, env):
588 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
543 589 """
544 590 Post push hook.
545 591
546 :param extras: dictionary containing the keys defined in simplevcs
547 :type extras: dict
548
549 592 :return: status code of the hook. 0 for success.
550 :rtype: int
551 593 """
552 594 extras = json.loads(env['RC_SCM_DATA'])
553 595 if 'push' not in extras['hooks']:
554 596 return 0
555 597
556 598 rev_data = _parse_git_ref_lines(revision_lines)
557 599
558 600 git_revs = []
559 601
560 602 # N.B.(skreft): it is ok to just call git, as git before calling a
561 603 # subcommand sets the PATH environment variable so that it point to the
562 604 # correct version of the git executable.
563 605 empty_commit_id = '0' * 40
564 606 branches = []
565 607 tags = []
566 608 for push_ref in rev_data:
567 609 type_ = push_ref['type']
568 610
569 611 if type_ == 'heads':
612 # starting new branch case
570 613 if push_ref['old_rev'] == empty_commit_id:
571 # starting new branch case
572 if push_ref['name'] not in branches:
573 branches.append(push_ref['name'])
614 push_ref_name = push_ref['name']
615
616 if push_ref_name not in branches:
617 branches.append(push_ref_name)
574 618
575 # Fix up head revision if needed
576 cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD']
577 try:
578 subprocessio.run_command(cmd, env=os.environ.copy())
579 except Exception:
580 cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', 'HEAD',
581 'refs/heads/%s' % push_ref['name']]
582 print("Setting default branch to %s" % push_ref['name'])
583 subprocessio.run_command(cmd, env=os.environ.copy())
619 need_head_set = ''
620 with Repository(os.getcwd()) as repo:
621 try:
622 repo.head
623 except pygit2.GitError:
624 need_head_set = f'refs/heads/{push_ref_name}'
584 625
585 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref',
586 '--format=%(refname)', 'refs/heads/*']
626 if need_head_set:
627 repo.set_head(need_head_set)
628 print(f"Setting default branch to {push_ref_name}")
629
630 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
587 631 stdout, stderr = subprocessio.run_command(
588 632 cmd, env=os.environ.copy())
589 heads = stdout
633 heads = safe_str(stdout)
590 634 heads = heads.replace(push_ref['ref'], '')
591 635 heads = ' '.join(head for head
592 636 in heads.splitlines() if head) or '.'
593 637 cmd = [settings.GIT_EXECUTABLE, 'log', '--reverse',
594 638 '--pretty=format:%H', '--', push_ref['new_rev'],
595 639 '--not', heads]
596 640 stdout, stderr = subprocessio.run_command(
597 641 cmd, env=os.environ.copy())
598 git_revs.extend(stdout.splitlines())
642 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
643
644 # delete branch case
599 645 elif push_ref['new_rev'] == empty_commit_id:
600 # delete branch case
601 git_revs.append('delete_branch=>%s' % push_ref['name'])
646 git_revs.append(f'delete_branch=>{push_ref["name"]}')
602 647 else:
603 648 if push_ref['name'] not in branches:
604 649 branches.append(push_ref['name'])
605 650
606 651 cmd = [settings.GIT_EXECUTABLE, 'log',
607 '{old_rev}..{new_rev}'.format(**push_ref),
652 f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
608 653 '--reverse', '--pretty=format:%H']
609 654 stdout, stderr = subprocessio.run_command(
610 655 cmd, env=os.environ.copy())
611 git_revs.extend(stdout.splitlines())
656 # we get bytes from stdout, we need str to be consistent
657 log_revs = list(map(ascii_str, stdout.splitlines()))
658 git_revs.extend(log_revs)
659
660 # Pure pygit2 impl. but still 2-3x slower :/
661 # results = []
662 #
663 # with Repository(os.getcwd()) as repo:
664 # repo_new_rev = repo[push_ref['new_rev']]
665 # repo_old_rev = repo[push_ref['old_rev']]
666 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
667 #
668 # for commit in walker:
669 # if commit.id == repo_old_rev.id:
670 # break
671 # results.append(commit.id.hex)
672 # # reverse the order, can't use GIT_SORT_REVERSE
673 # log_revs = results[::-1]
674
612 675 elif type_ == 'tags':
613 676 if push_ref['name'] not in tags:
614 677 tags.append(push_ref['name'])
615 git_revs.append('tag=>%s' % push_ref['name'])
678 git_revs.append(f'tag=>{push_ref["name"]}')
616 679
617 680 extras['hook_type'] = 'post_receive'
618 681 extras['commit_ids'] = git_revs
619 682 extras['new_refs'] = {
620 683 'branches': branches,
621 684 'bookmarks': [],
622 685 'tags': tags,
623 686 }
624 687
688 stdout = sys.stdout
689
625 690 if 'repo_size' in extras['hooks']:
626 691 try:
627 _call_hook('repo_size', extras, GitMessageWriter())
628 except:
692 _call_hook('repo_size', extras, GitMessageWriter(stdout))
693 except Exception:
629 694 pass
630 695
631 return _call_hook('post_push', extras, GitMessageWriter())
696 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
697 return status_code
632 698
633 699
634 700 def _get_extras_from_txn_id(path, txn_id):
635 701 extras = {}
636 702 try:
637 703 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
638 704 '-t', txn_id,
639 705 '--revprop', path, 'rc-scm-extras']
640 706 stdout, stderr = subprocessio.run_command(
641 707 cmd, env=os.environ.copy())
642 708 extras = json.loads(base64.urlsafe_b64decode(stdout))
643 709 except Exception:
644 710 log.exception('Failed to extract extras info from txn_id')
645 711
646 712 return extras
647 713
648 714
649 715 def _get_extras_from_commit_id(commit_id, path):
650 716 extras = {}
651 717 try:
652 718 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
653 719 '-r', commit_id,
654 720 '--revprop', path, 'rc-scm-extras']
655 721 stdout, stderr = subprocessio.run_command(
656 722 cmd, env=os.environ.copy())
657 723 extras = json.loads(base64.urlsafe_b64decode(stdout))
658 724 except Exception:
659 725 log.exception('Failed to extract extras info from commit_id')
660 726
661 727 return extras
662 728
663 729
664 730 def svn_pre_commit(repo_path, commit_data, env):
665 731 path, txn_id = commit_data
666 732 branches = []
667 733 tags = []
668 734
669 735 if env.get('RC_SCM_DATA'):
670 736 extras = json.loads(env['RC_SCM_DATA'])
671 737 else:
672 738 # fallback method to read from TXN-ID stored data
673 739 extras = _get_extras_from_txn_id(path, txn_id)
674 740 if not extras:
675 741 return 0
676 742
677 743 extras['hook_type'] = 'pre_commit'
678 744 extras['commit_ids'] = [txn_id]
679 745 extras['txn_id'] = txn_id
680 746 extras['new_refs'] = {
681 747 'total_commits': 1,
682 748 'branches': branches,
683 749 'bookmarks': [],
684 750 'tags': tags,
685 751 }
686 752
687 753 return _call_hook('pre_push', extras, SvnMessageWriter())
688 754
689 755
690 756 def svn_post_commit(repo_path, commit_data, env):
691 757 """
692 758 commit_data is path, rev, txn_id
693 759 """
694 760 if len(commit_data) == 3:
695 761 path, commit_id, txn_id = commit_data
696 762 elif len(commit_data) == 2:
697 763 log.error('Failed to extract txn_id from commit_data using legacy method. '
698 764 'Some functionality might be limited')
699 765 path, commit_id = commit_data
700 766 txn_id = None
701 767
702 768 branches = []
703 769 tags = []
704 770
705 771 if env.get('RC_SCM_DATA'):
706 772 extras = json.loads(env['RC_SCM_DATA'])
707 773 else:
708 774 # fallback method to read from TXN-ID stored data
709 775 extras = _get_extras_from_commit_id(commit_id, path)
710 776 if not extras:
711 777 return 0
712 778
713 779 extras['hook_type'] = 'post_commit'
714 780 extras['commit_ids'] = [commit_id]
715 781 extras['txn_id'] = txn_id
716 782 extras['new_refs'] = {
717 783 'branches': branches,
718 784 'bookmarks': [],
719 785 'tags': tags,
720 786 'total_commits': 1,
721 787 }
722 788
723 789 if 'repo_size' in extras['hooks']:
724 790 try:
725 791 _call_hook('repo_size', extras, SvnMessageWriter())
726 792 except Exception:
727 793 pass
728 794
729 795 return _call_hook('post_push', extras, SvnMessageWriter())
@@ -1,705 +1,775 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import io
18 19 import os
20 import platform
19 21 import sys
20 import base64
21 22 import locale
22 23 import logging
23 24 import uuid
25 import time
24 26 import wsgiref.util
25 import traceback
26 27 import tempfile
27 28 import psutil
29
28 30 from itertools import chain
29 from cStringIO import StringIO
30 31
31 import simplejson as json
32 32 import msgpack
33 import configparser
34
33 35 from pyramid.config import Configurator
34 from pyramid.settings import asbool, aslist
35 36 from pyramid.wsgi import wsgiapp
36 from pyramid.compat import configparser
37 37 from pyramid.response import Response
38 38
39 from vcsserver.utils import safe_int
39 from vcsserver.base import BytesEnvelope, BinaryEnvelope
40 from vcsserver.lib.rc_json import json
41 from vcsserver.config.settings_maker import SettingsMaker
42 from vcsserver.str_utils import safe_int
43 from vcsserver.lib.statsd_client import StatsdClient
44 from vcsserver.tweens.request_wrapper import get_headers_call_context
45
46 import vcsserver
47 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
48 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
49 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
50 from vcsserver.echo_stub.echo_app import EchoApp
51 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
52 from vcsserver.lib.exc_tracking import store_exception, format_exc
53 from vcsserver.server import VcsServer
54
55 strict_vcs = True
56
57 git_import_err = None
58 try:
59 from vcsserver.remote.git_remote import GitFactory, GitRemote
60 except ImportError as e:
61 GitFactory = None
62 GitRemote = None
63 git_import_err = e
64 if strict_vcs:
65 raise
66
67
68 hg_import_err = None
69 try:
70 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
71 except ImportError as e:
72 MercurialFactory = None
73 HgRemote = None
74 hg_import_err = e
75 if strict_vcs:
76 raise
77
78
79 svn_import_err = None
80 try:
81 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
82 except ImportError as e:
83 SubversionFactory = None
84 SvnRemote = None
85 svn_import_err = e
86 if strict_vcs:
87 raise
40 88
41 89 log = logging.getLogger(__name__)
42 90
43 91 # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
44 92 # causing problems and "fix" it in case they do and fallback to LC_ALL = C
45 93
46 94 try:
47 95 locale.setlocale(locale.LC_ALL, '')
48 96 except locale.Error as e:
49 97 log.error(
50 98 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
51 99 os.environ['LC_ALL'] = 'C'
52 100
53 import vcsserver
54 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
55 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
56 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
57 from vcsserver.echo_stub.echo_app import EchoApp
58 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
59 from vcsserver.lib.exc_tracking import store_exception
60 from vcsserver.server import VcsServer
61
62 try:
63 from vcsserver.git import GitFactory, GitRemote
64 except ImportError:
65 GitFactory = None
66 GitRemote = None
67
68 try:
69 from vcsserver.hg import MercurialFactory, HgRemote
70 except ImportError:
71 MercurialFactory = None
72 HgRemote = None
73
74 try:
75 from vcsserver.svn import SubversionFactory, SvnRemote
76 except ImportError:
77 SubversionFactory = None
78 SvnRemote = None
79
80 101
81 102 def _is_request_chunked(environ):
82 103 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
83 104 return stream
84 105
85 106
86 def _int_setting(settings, name, default):
87 settings[name] = int(settings.get(name, default))
88 return settings[name]
89
90
91 def _bool_setting(settings, name, default):
92 input_val = settings.get(name, default)
93 if isinstance(input_val, unicode):
94 input_val = input_val.encode('utf8')
95 settings[name] = asbool(input_val)
96 return settings[name]
97
98
99 def _list_setting(settings, name, default):
100 raw_value = settings.get(name, default)
101
102 # Otherwise we assume it uses pyramids space/newline separation.
103 settings[name] = aslist(raw_value)
104 return settings[name]
105
106
107 def _string_setting(settings, name, default, lower=True, default_when_empty=False):
108 value = settings.get(name, default)
109
110 if default_when_empty and not value:
111 # use default value when value is empty
112 value = default
113
114 if lower:
115 value = value.lower()
116 settings[name] = value
117 return settings[name]
118
119
120 107 def log_max_fd():
121 108 try:
122 109 maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
123 110 log.info('Max file descriptors value: %s', maxfd)
124 111 except Exception:
125 112 pass
126 113
127 114
128 class VCS(object):
115 class VCS:
129 116 def __init__(self, locale_conf=None, cache_config=None):
130 117 self.locale = locale_conf
131 118 self.cache_config = cache_config
132 119 self._configure_locale()
133 120
134 121 log_max_fd()
135 122
136 123 if GitFactory and GitRemote:
137 124 git_factory = GitFactory()
138 125 self._git_remote = GitRemote(git_factory)
139 126 else:
140 log.info("Git client import failed")
127 log.error("Git client import failed: %s", git_import_err)
141 128
142 129 if MercurialFactory and HgRemote:
143 130 hg_factory = MercurialFactory()
144 131 self._hg_remote = HgRemote(hg_factory)
145 132 else:
146 log.info("Mercurial client import failed")
133 log.error("Mercurial client import failed: %s", hg_import_err)
147 134
148 135 if SubversionFactory and SvnRemote:
149 136 svn_factory = SubversionFactory()
150 137
151 138 # hg factory is used for svn url validation
152 139 hg_factory = MercurialFactory()
153 140 self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
154 141 else:
155 log.info("Subversion client import failed")
142 log.error("Subversion client import failed: %s", svn_import_err)
156 143
157 144 self._vcsserver = VcsServer()
158 145
159 146 def _configure_locale(self):
160 147 if self.locale:
161 148 log.info('Settings locale: `LC_ALL` to %s', self.locale)
162 149 else:
163 log.info(
164 'Configuring locale subsystem based on environment variables')
150 log.info('Configuring locale subsystem based on environment variables')
165 151 try:
166 152 # If self.locale is the empty string, then the locale
167 153 # module will use the environment variables. See the
168 154 # documentation of the package `locale`.
169 155 locale.setlocale(locale.LC_ALL, self.locale)
170 156
171 157 language_code, encoding = locale.getlocale()
172 158 log.info(
173 159 'Locale set to language code "%s" with encoding "%s".',
174 160 language_code, encoding)
175 161 except locale.Error:
176 log.exception(
177 'Cannot set locale, not configuring the locale system')
162 log.exception('Cannot set locale, not configuring the locale system')
178 163
179 164
180 class WsgiProxy(object):
165 class WsgiProxy:
181 166 def __init__(self, wsgi):
182 167 self.wsgi = wsgi
183 168
184 169 def __call__(self, environ, start_response):
185 170 input_data = environ['wsgi.input'].read()
186 171 input_data = msgpack.unpackb(input_data)
187 172
188 173 error = None
189 174 try:
190 175 data, status, headers = self.wsgi.handle(
191 176 input_data['environment'], input_data['input_data'],
192 177 *input_data['args'], **input_data['kwargs'])
193 178 except Exception as e:
194 179 data, status, headers = [], None, None
195 180 error = {
196 181 'message': str(e),
197 182 '_vcs_kind': getattr(e, '_vcs_kind', None)
198 183 }
199 184
200 185 start_response(200, {})
201 186 return self._iterator(error, status, headers, data)
202 187
203 188 def _iterator(self, error, status, headers, data):
204 189 initial_data = [
205 190 error,
206 191 status,
207 192 headers,
208 193 ]
209 194
210 195 for d in chain(initial_data, data):
211 196 yield msgpack.packb(d)
212 197
213 198
214 199 def not_found(request):
215 200 return {'status': '404 NOT FOUND'}
216 201
217 202
218 class VCSViewPredicate(object):
203 class VCSViewPredicate:
219 204 def __init__(self, val, config):
220 205 self.remotes = val
221 206
222 207 def text(self):
223 return 'vcs view method = %s' % (self.remotes.keys(),)
208 return f'vcs view method = {list(self.remotes.keys())}'
224 209
225 210 phash = text
226 211
227 212 def __call__(self, context, request):
228 213 """
229 214 View predicate that returns true if given backend is supported by
230 215 defined remotes.
231 216 """
232 217 backend = request.matchdict.get('backend')
233 218 return backend in self.remotes
234 219
235 220
236 class HTTPApplication(object):
221 class HTTPApplication:
237 222 ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
238 223
239 224 remote_wsgi = remote_wsgi
240 225 _use_echo_app = False
241 226
242 227 def __init__(self, settings=None, global_config=None):
243 self._sanitize_settings_and_apply_defaults(settings)
244 228
245 229 self.config = Configurator(settings=settings)
230 # Init our statsd at very start
231 self.config.registry.statsd = StatsdClient.statsd
232 self.config.registry.vcs_call_context = {}
233
246 234 self.global_config = global_config
247 235 self.config.include('vcsserver.lib.rc_cache')
236 self.config.include('vcsserver.lib.rc_cache.archive_cache')
248 237
249 238 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
250 239 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
251 240 self._remotes = {
252 241 'hg': vcs._hg_remote,
253 242 'git': vcs._git_remote,
254 243 'svn': vcs._svn_remote,
255 244 'server': vcs._vcsserver,
256 245 }
257 246 if settings.get('dev.use_echo_app', 'false').lower() == 'true':
258 247 self._use_echo_app = True
259 248 log.warning("Using EchoApp for VCS operations.")
260 249 self.remote_wsgi = remote_wsgi_stub
261 250
262 251 self._configure_settings(global_config, settings)
263 252
264 253 self._configure()
265 254
266 255 def _configure_settings(self, global_config, app_settings):
267 256 """
268 257 Configure the settings module.
269 258 """
270 259 settings_merged = global_config.copy()
271 260 settings_merged.update(app_settings)
272 261
273 262 git_path = app_settings.get('git_path', None)
274 263 if git_path:
275 264 settings.GIT_EXECUTABLE = git_path
276 265 binary_dir = app_settings.get('core.binary_dir', None)
277 266 if binary_dir:
278 267 settings.BINARY_DIR = binary_dir
279 268
280 269 # Store the settings to make them available to other modules.
281 270 vcsserver.PYRAMID_SETTINGS = settings_merged
282 271 vcsserver.CONFIG = settings_merged
283 272
284 def _sanitize_settings_and_apply_defaults(self, settings):
285 temp_store = tempfile.gettempdir()
286 default_cache_dir = os.path.join(temp_store, 'rc_cache')
287
288 # save default, cache dir, and use it for all backends later.
289 default_cache_dir = _string_setting(
290 settings,
291 'cache_dir',
292 default_cache_dir, lower=False, default_when_empty=True)
293
294 # ensure we have our dir created
295 if not os.path.isdir(default_cache_dir):
296 os.makedirs(default_cache_dir, mode=0o755)
297
298 # exception store cache
299 _string_setting(
300 settings,
301 'exception_tracker.store_path',
302 temp_store, lower=False, default_when_empty=True)
303
304 # repo_object cache
305 _string_setting(
306 settings,
307 'rc_cache.repo_object.backend',
308 'dogpile.cache.rc.file_namespace', lower=False)
309 _int_setting(
310 settings,
311 'rc_cache.repo_object.expiration_time',
312 30 * 24 * 60 * 60)
313 _string_setting(
314 settings,
315 'rc_cache.repo_object.arguments.filename',
316 os.path.join(default_cache_dir, 'vcsserver_cache_1'), lower=False)
317
318 273 def _configure(self):
319 274 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
320 275
321 276 self.config.add_route('service', '/_service')
322 277 self.config.add_route('status', '/status')
323 278 self.config.add_route('hg_proxy', '/proxy/hg')
324 279 self.config.add_route('git_proxy', '/proxy/git')
325 280
326 281 # rpc methods
327 282 self.config.add_route('vcs', '/{backend}')
328 283
329 284 # streaming rpc remote methods
330 285 self.config.add_route('vcs_stream', '/{backend}/stream')
331 286
332 287 # vcs operations clone/push as streaming
333 288 self.config.add_route('stream_git', '/stream/git/*repo_name')
334 289 self.config.add_route('stream_hg', '/stream/hg/*repo_name')
335 290
336 291 self.config.add_view(self.status_view, route_name='status', renderer='json')
337 292 self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
338 293
339 294 self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
340 295 self.config.add_view(self.git_proxy(), route_name='git_proxy')
341 296 self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
342 297 vcs_view=self._remotes)
343 298 self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
344 299 vcs_view=self._remotes)
345 300
346 301 self.config.add_view(self.hg_stream(), route_name='stream_hg')
347 302 self.config.add_view(self.git_stream(), route_name='stream_git')
348 303
349 304 self.config.add_view_predicate('vcs_view', VCSViewPredicate)
350 305
351 306 self.config.add_notfound_view(not_found, renderer='json')
352 307
353 308 self.config.add_view(self.handle_vcs_exception, context=Exception)
354 309
355 310 self.config.add_tween(
356 311 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
357 312 )
358 313 self.config.add_request_method(
359 314 'vcsserver.lib.request_counter.get_request_counter',
360 315 'request_count')
361 316
362 self.config.add_request_method(
363 'vcsserver.lib._vendor.statsd.get_statsd_client',
364 'statsd', reify=True)
365
366 317 def wsgi_app(self):
367 318 return self.config.make_wsgi_app()
368 319
369 320 def _vcs_view_params(self, request):
370 321 remote = self._remotes[request.matchdict['backend']]
371 322 payload = msgpack.unpackb(request.body, use_list=True)
323
372 324 method = payload.get('method')
373 325 params = payload['params']
374 326 wire = params.get('wire')
375 327 args = params.get('args')
376 328 kwargs = params.get('kwargs')
377 329 context_uid = None
378 330
331 request.registry.vcs_call_context = {
332 'method': method,
333 'repo_name': payload.get('_repo_name'),
334 }
335
379 336 if wire:
380 337 try:
381 338 wire['context'] = context_uid = uuid.UUID(wire['context'])
382 339 except KeyError:
383 340 pass
384 341 args.insert(0, wire)
385 342 repo_state_uid = wire.get('repo_state_uid') if wire else None
386 343
387 344 # NOTE(marcink): trading complexity for slight performance
388 345 if log.isEnabledFor(logging.DEBUG):
389 no_args_methods = [
390
391 ]
392 if method in no_args_methods:
346 # also we SKIP printing out any of those methods args since they maybe excessive
347 just_args_methods = {
348 'commitctx': ('content', 'removed', 'updated'),
349 'commit': ('content', 'removed', 'updated')
350 }
351 if method in just_args_methods:
352 skip_args = just_args_methods[method]
393 353 call_args = ''
354 call_kwargs = {}
355 for k in kwargs:
356 if k in skip_args:
357 # replace our skip key with dummy
358 call_kwargs[k] = f'RemovedParam({k})'
359 else:
360 call_kwargs[k] = kwargs[k]
394 361 else:
395 362 call_args = args[1:]
363 call_kwargs = kwargs
396 364
397 365 log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
398 method, call_args, kwargs, context_uid, repo_state_uid)
366 method, call_args, call_kwargs, context_uid, repo_state_uid)
399 367
368 statsd = request.registry.statsd
369 if statsd:
370 statsd.incr(
371 'vcsserver_method_total', tags=[
372 f"method:{method}",
373 ])
400 374 return payload, remote, method, args, kwargs
401 375
402 376 def vcs_view(self, request):
403 377
404 378 payload, remote, method, args, kwargs = self._vcs_view_params(request)
405 379 payload_id = payload.get('id')
406 380
407 381 try:
408 382 resp = getattr(remote, method)(*args, **kwargs)
409 383 except Exception as e:
410 384 exc_info = list(sys.exc_info())
411 385 exc_type, exc_value, exc_traceback = exc_info
412 386
413 387 org_exc = getattr(e, '_org_exc', None)
414 388 org_exc_name = None
415 389 org_exc_tb = ''
416 390 if org_exc:
417 391 org_exc_name = org_exc.__class__.__name__
418 392 org_exc_tb = getattr(e, '_org_exc_tb', '')
419 393 # replace our "faked" exception with our org
420 394 exc_info[0] = org_exc.__class__
421 395 exc_info[1] = org_exc
422 396
423 397 should_store_exc = True
424 398 if org_exc:
425 399 def get_exc_fqn(_exc_obj):
426 400 module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
427 401 return module_name + '.' + org_exc_name
428 402
429 403 exc_fqn = get_exc_fqn(org_exc)
430 404
431 405 if exc_fqn in ['mercurial.error.RepoLookupError',
432 406 'vcsserver.exceptions.RefNotFoundException']:
433 407 should_store_exc = False
434 408
435 409 if should_store_exc:
436 410 store_exception(id(exc_info), exc_info, request_path=request.path)
437 411
438 tb_info = ''.join(
439 traceback.format_exception(exc_type, exc_value, exc_traceback))
412 tb_info = format_exc(exc_info)
440 413
441 414 type_ = e.__class__.__name__
442 415 if type_ not in self.ALLOWED_EXCEPTIONS:
443 416 type_ = None
444 417
445 418 resp = {
446 419 'id': payload_id,
447 420 'error': {
448 'message': e.message,
421 'message': str(e),
449 422 'traceback': tb_info,
450 423 'org_exc': org_exc_name,
451 424 'org_exc_tb': org_exc_tb,
452 425 'type': type_
453 426 }
454 427 }
455 428
456 429 try:
457 430 resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
458 431 except AttributeError:
459 432 pass
460 433 else:
461 434 resp = {
462 435 'id': payload_id,
463 436 'result': resp
464 437 }
465
438 log.debug('Serving data for method %s', method)
466 439 return resp
467 440
468 441 def vcs_stream_view(self, request):
469 442 payload, remote, method, args, kwargs = self._vcs_view_params(request)
470 443 # this method has a stream: marker we remove it here
471 444 method = method.split('stream:')[-1]
472 445 chunk_size = safe_int(payload.get('chunk_size')) or 4096
473 446
474 try:
475 resp = getattr(remote, method)(*args, **kwargs)
476 except Exception as e:
477 raise
447 resp = getattr(remote, method)(*args, **kwargs)
478 448
479 449 def get_chunked_data(method_resp):
480 stream = StringIO(method_resp)
450 stream = io.BytesIO(method_resp)
481 451 while 1:
482 452 chunk = stream.read(chunk_size)
483 453 if not chunk:
484 454 break
485 455 yield chunk
486 456
487 457 response = Response(app_iter=get_chunked_data(resp))
488 458 response.content_type = 'application/octet-stream'
489 459
490 460 return response
491 461
492 462 def status_view(self, request):
493 463 import vcsserver
494 return {'status': 'OK', 'vcsserver_version': vcsserver.__version__,
495 'pid': os.getpid()}
464 _platform_id = platform.uname()[1] or 'instance'
465
466 return {
467 "status": "OK",
468 "vcsserver_version": vcsserver.get_version(),
469 "platform": _platform_id,
470 "pid": os.getpid(),
471 }
496 472
497 473 def service_view(self, request):
498 474 import vcsserver
499 475
500 476 payload = msgpack.unpackb(request.body, use_list=True)
501 477 server_config, app_config = {}, {}
502 478
503 479 try:
504 480 path = self.global_config['__file__']
505 481 config = configparser.RawConfigParser()
506 482
507 483 config.read(path)
508 484
509 485 if config.has_section('server:main'):
510 486 server_config = dict(config.items('server:main'))
511 487 if config.has_section('app:main'):
512 488 app_config = dict(config.items('app:main'))
513 489
514 490 except Exception:
515 491 log.exception('Failed to read .ini file for display')
516 492
517 environ = os.environ.items()
493 environ = list(os.environ.items())
518 494
519 495 resp = {
520 496 'id': payload.get('id'),
521 497 'result': dict(
522 version=vcsserver.__version__,
498 version=vcsserver.get_version(),
523 499 config=server_config,
524 500 app_config=app_config,
525 501 environ=environ,
526 502 payload=payload,
527 503 )
528 504 }
529 505 return resp
530 506
531 507 def _msgpack_renderer_factory(self, info):
508
532 509 def _render(value, system):
510 bin_type = False
511 res = value.get('result')
512 if isinstance(res, BytesEnvelope):
513 log.debug('Result is wrapped in BytesEnvelope type')
514 bin_type = True
515 elif isinstance(res, BinaryEnvelope):
516 log.debug('Result is wrapped in BinaryEnvelope type')
517 value['result'] = res.val
518 bin_type = True
519
533 520 request = system.get('request')
534 521 if request is not None:
535 522 response = request.response
536 523 ct = response.content_type
537 524 if ct == response.default_content_type:
538 525 response.content_type = 'application/x-msgpack'
539 return msgpack.packb(value)
526 if bin_type:
527 response.content_type = 'application/x-msgpack-bin'
528
529 return msgpack.packb(value, use_bin_type=bin_type)
540 530 return _render
541 531
542 532 def set_env_from_config(self, environ, config):
543 533 dict_conf = {}
544 534 try:
545 535 for elem in config:
546 536 if elem[0] == 'rhodecode':
547 537 dict_conf = json.loads(elem[2])
548 538 break
549 539 except Exception:
550 540 log.exception('Failed to fetch SCM CONFIG')
551 541 return
552 542
553 543 username = dict_conf.get('username')
554 544 if username:
555 545 environ['REMOTE_USER'] = username
556 546 # mercurial specific, some extension api rely on this
557 547 environ['HGUSER'] = username
558 548
559 549 ip = dict_conf.get('ip')
560 550 if ip:
561 551 environ['REMOTE_HOST'] = ip
562 552
563 553 if _is_request_chunked(environ):
564 554 # set the compatibility flag for webob
565 555 environ['wsgi.input_terminated'] = True
566 556
567 557 def hg_proxy(self):
568 558 @wsgiapp
569 559 def _hg_proxy(environ, start_response):
570 560 app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
571 561 return app(environ, start_response)
572 562 return _hg_proxy
573 563
574 564 def git_proxy(self):
575 565 @wsgiapp
576 566 def _git_proxy(environ, start_response):
577 567 app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
578 568 return app(environ, start_response)
579 569 return _git_proxy
580 570
581 571 def hg_stream(self):
582 572 if self._use_echo_app:
583 573 @wsgiapp
584 574 def _hg_stream(environ, start_response):
585 575 app = EchoApp('fake_path', 'fake_name', None)
586 576 return app(environ, start_response)
587 577 return _hg_stream
588 578 else:
589 579 @wsgiapp
590 580 def _hg_stream(environ, start_response):
591 581 log.debug('http-app: handling hg stream')
592 repo_path = environ['HTTP_X_RC_REPO_PATH']
593 repo_name = environ['HTTP_X_RC_REPO_NAME']
594 packed_config = base64.b64decode(
595 environ['HTTP_X_RC_REPO_CONFIG'])
596 config = msgpack.unpackb(packed_config)
582 call_context = get_headers_call_context(environ)
583
584 repo_path = call_context['repo_path']
585 repo_name = call_context['repo_name']
586 config = call_context['repo_config']
587
597 588 app = scm_app.create_hg_wsgi_app(
598 589 repo_path, repo_name, config)
599 590
600 591 # Consistent path information for hgweb
601 environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
592 environ['PATH_INFO'] = call_context['path_info']
602 593 environ['REPO_NAME'] = repo_name
603 594 self.set_env_from_config(environ, config)
604 595
605 596 log.debug('http-app: starting app handler '
606 597 'with %s and process request', app)
607 598 return app(environ, ResponseFilter(start_response))
608 599 return _hg_stream
609 600
610 601 def git_stream(self):
611 602 if self._use_echo_app:
612 603 @wsgiapp
613 604 def _git_stream(environ, start_response):
614 605 app = EchoApp('fake_path', 'fake_name', None)
615 606 return app(environ, start_response)
616 607 return _git_stream
617 608 else:
618 609 @wsgiapp
619 610 def _git_stream(environ, start_response):
620 611 log.debug('http-app: handling git stream')
621 repo_path = environ['HTTP_X_RC_REPO_PATH']
622 repo_name = environ['HTTP_X_RC_REPO_NAME']
623 packed_config = base64.b64decode(
624 environ['HTTP_X_RC_REPO_CONFIG'])
625 config = msgpack.unpackb(packed_config)
612
613 call_context = get_headers_call_context(environ)
626 614
627 environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
615 repo_path = call_context['repo_path']
616 repo_name = call_context['repo_name']
617 config = call_context['repo_config']
618
619 environ['PATH_INFO'] = call_context['path_info']
628 620 self.set_env_from_config(environ, config)
629 621
630 622 content_type = environ.get('CONTENT_TYPE', '')
631 623
632 624 path = environ['PATH_INFO']
633 625 is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
634 626 log.debug(
635 627 'LFS: Detecting if request `%s` is LFS server path based '
636 628 'on content type:`%s`, is_lfs:%s',
637 629 path, content_type, is_lfs_request)
638 630
639 631 if not is_lfs_request:
640 632 # fallback detection by path
641 633 if GIT_LFS_PROTO_PAT.match(path):
642 634 is_lfs_request = True
643 635 log.debug(
644 636 'LFS: fallback detection by path of: `%s`, is_lfs:%s',
645 637 path, is_lfs_request)
646 638
647 639 if is_lfs_request:
648 640 app = scm_app.create_git_lfs_wsgi_app(
649 641 repo_path, repo_name, config)
650 642 else:
651 643 app = scm_app.create_git_wsgi_app(
652 644 repo_path, repo_name, config)
653 645
654 646 log.debug('http-app: starting app handler '
655 647 'with %s and process request', app)
656 648
657 649 return app(environ, start_response)
658 650
659 651 return _git_stream
660 652
661 653 def handle_vcs_exception(self, exception, request):
662 654 _vcs_kind = getattr(exception, '_vcs_kind', '')
655
663 656 if _vcs_kind == 'repo_locked':
664 # Get custom repo-locked status code if present.
665 status_code = request.headers.get('X-RC-Locked-Status-Code')
657 headers_call_context = get_headers_call_context(request.environ)
658 status_code = safe_int(headers_call_context['locked_status_code'])
659
666 660 return HTTPRepoLocked(
667 title=exception.message, status_code=status_code)
661 title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
668 662
669 663 elif _vcs_kind == 'repo_branch_protected':
670 664 # Get custom repo-branch-protected status code if present.
671 return HTTPRepoBranchProtected(title=exception.message)
665 return HTTPRepoBranchProtected(
666 title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
672 667
673 668 exc_info = request.exc_info
674 669 store_exception(id(exc_info), exc_info)
675 670
676 671 traceback_info = 'unavailable'
677 672 if request.exc_info:
678 exc_type, exc_value, exc_tb = request.exc_info
679 traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
673 traceback_info = format_exc(request.exc_info)
680 674
681 675 log.error(
682 'error occurred handling this request for path: %s, \n tb: %s',
676 'error occurred handling this request for path: %s, \n%s',
683 677 request.path, traceback_info)
678
679 statsd = request.registry.statsd
680 if statsd:
681 exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
682 statsd.incr('vcsserver_exception_total',
683 tags=[f"type:{exc_type}"])
684 684 raise exception
685 685
686 686
687 class ResponseFilter(object):
687 class ResponseFilter:
688 688
689 689 def __init__(self, start_response):
690 690 self._start_response = start_response
691 691
692 692 def __call__(self, status, response_headers, exc_info=None):
693 693 headers = tuple(
694 694 (h, v) for h, v in response_headers
695 695 if not wsgiref.util.is_hop_by_hop(h))
696 696 return self._start_response(status, headers, exc_info)
697 697
698 698
699 def sanitize_settings_and_apply_defaults(global_config, settings):
700 _global_settings_maker = SettingsMaker(global_config)
701 settings_maker = SettingsMaker(settings)
702
703 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
704
705 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
706 settings_maker.enable_logging(logging_conf)
707
708 # Default includes, possible to change as a user
709 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
710 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
711
712 settings_maker.make_setting('__file__', global_config.get('__file__'))
713
714 settings_maker.make_setting('pyramid.default_locale_name', 'en')
715 settings_maker.make_setting('locale', 'en_US.UTF-8')
716
717 settings_maker.make_setting('core.binary_dir', '')
718
719 temp_store = tempfile.gettempdir()
720 default_cache_dir = os.path.join(temp_store, 'rc_cache')
721 # save default, cache dir, and use it for all backends later.
722 default_cache_dir = settings_maker.make_setting(
723 'cache_dir',
724 default=default_cache_dir, default_when_empty=True,
725 parser='dir:ensured')
726
727 # exception store cache
728 settings_maker.make_setting(
729 'exception_tracker.store_path',
730 default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
731 parser='dir:ensured'
732 )
733
734 # repo_object cache defaults
735 settings_maker.make_setting(
736 'rc_cache.repo_object.backend',
737 default='dogpile.cache.rc.file_namespace',
738 parser='string')
739 settings_maker.make_setting(
740 'rc_cache.repo_object.expiration_time',
741 default=30 * 24 * 60 * 60, # 30days
742 parser='int')
743 settings_maker.make_setting(
744 'rc_cache.repo_object.arguments.filename',
745 default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
746 parser='string')
747
748 # statsd
749 settings_maker.make_setting('statsd.enabled', False, parser='bool')
750 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
751 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
752 settings_maker.make_setting('statsd.statsd_prefix', '')
753 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
754
755 settings_maker.env_expand()
756
757
699 758 def main(global_config, **settings):
759 start_time = time.time()
760 log.info('Pyramid app config starting')
761
700 762 if MercurialFactory:
701 763 hgpatches.patch_largefiles_capabilities()
702 764 hgpatches.patch_subrepo_type_mapping()
703 765
704 app = HTTPApplication(settings=settings, global_config=global_config)
705 return app.wsgi_app()
766 # Fill in and sanitize the defaults & do ENV expansion
767 sanitize_settings_and_apply_defaults(global_config, settings)
768
769 # init and bootstrap StatsdClient
770 StatsdClient.setup(settings)
771
772 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
773 total_time = time.time() - start_time
774 log.info('Pyramid app created and configured in %.2fs', total_time)
775 return pyramid_app
@@ -1,16 +1,16 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -1,26 +1,26 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 # This package contains non rhodecode licensed packages that are
19 19 # vendored for various reasons
20 20
21 21 import os
22 22 import sys
23 23
24 24 vendor_dir = os.path.abspath(os.path.dirname(__file__))
25 25
26 26 sys.path.append(vendor_dir)
@@ -1,390 +1,394 b''
1 import sys
1
2 2 import threading
3 3 import weakref
4 4 from base64 import b64encode
5 5 from logging import getLogger
6 6 from os import urandom
7 from typing import Union
7 8
8 9 from redis import StrictRedis
9 10
10 __version__ = '3.7.0'
11 __version__ = '4.0.0'
11 12
12 13 loggers = {
13 14 k: getLogger("vcsserver." + ".".join((__name__, k)))
14 15 for k in [
15 16 "acquire",
16 17 "refresh.thread.start",
17 18 "refresh.thread.stop",
18 19 "refresh.thread.exit",
19 20 "refresh.start",
20 21 "refresh.shutdown",
21 22 "refresh.exit",
22 23 "release",
23 24 ]
24 25 }
25 26
26 PY3 = sys.version_info[0] == 3
27
28 if PY3:
29 text_type = str
30 binary_type = bytes
31 else:
32 text_type = unicode # noqa
33 binary_type = str
27 text_type = str
28 binary_type = bytes
34 29
35 30
36 31 # Check if the id match. If not, return an error code.
37 32 UNLOCK_SCRIPT = b"""
38 33 if redis.call("get", KEYS[1]) ~= ARGV[1] then
39 34 return 1
40 35 else
41 36 redis.call("del", KEYS[2])
42 37 redis.call("lpush", KEYS[2], 1)
43 38 redis.call("pexpire", KEYS[2], ARGV[2])
44 39 redis.call("del", KEYS[1])
45 40 return 0
46 41 end
47 42 """
48 43
49 44 # Covers both cases when key doesn't exist and doesn't equal to lock's id
50 45 EXTEND_SCRIPT = b"""
51 46 if redis.call("get", KEYS[1]) ~= ARGV[1] then
52 47 return 1
53 48 elseif redis.call("ttl", KEYS[1]) < 0 then
54 49 return 2
55 50 else
56 51 redis.call("expire", KEYS[1], ARGV[2])
57 52 return 0
58 53 end
59 54 """
60 55
61 56 RESET_SCRIPT = b"""
62 57 redis.call('del', KEYS[2])
63 58 redis.call('lpush', KEYS[2], 1)
64 59 redis.call('pexpire', KEYS[2], ARGV[2])
65 60 return redis.call('del', KEYS[1])
66 61 """
67 62
68 63 RESET_ALL_SCRIPT = b"""
69 64 local locks = redis.call('keys', 'lock:*')
70 65 local signal
71 66 for _, lock in pairs(locks) do
72 67 signal = 'lock-signal:' .. string.sub(lock, 6)
73 68 redis.call('del', signal)
74 69 redis.call('lpush', signal, 1)
75 70 redis.call('expire', signal, 1)
76 71 redis.call('del', lock)
77 72 end
78 73 return #locks
79 74 """
80 75
81 76
82 77 class AlreadyAcquired(RuntimeError):
83 78 pass
84 79
85 80
86 81 class NotAcquired(RuntimeError):
87 82 pass
88 83
89 84
90 85 class AlreadyStarted(RuntimeError):
91 86 pass
92 87
93 88
94 89 class TimeoutNotUsable(RuntimeError):
95 90 pass
96 91
97 92
98 93 class InvalidTimeout(RuntimeError):
99 94 pass
100 95
101 96
102 97 class TimeoutTooLarge(RuntimeError):
103 98 pass
104 99
105 100
106 101 class NotExpirable(RuntimeError):
107 102 pass
108 103
109 104
110 class Lock(object):
105 class Lock:
111 106 """
112 107 A Lock context manager implemented via redis SETNX/BLPOP.
113 108 """
109
114 110 unlock_script = None
115 111 extend_script = None
116 112 reset_script = None
117 113 reset_all_script = None
118 114
115 _lock_renewal_interval: float
116 _lock_renewal_thread: Union[threading.Thread, None]
117
119 118 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
120 119 """
121 120 :param redis_client:
122 121 An instance of :class:`~StrictRedis`.
123 122 :param name:
124 123 The name (redis key) the lock should have.
125 124 :param expire:
126 125 The lock expiry time in seconds. If left at the default (None)
127 126 the lock will not expire.
128 127 :param id:
129 128 The ID (redis value) the lock should have. A random value is
130 129 generated when left at the default.
131 130
132 131 Note that if you specify this then the lock is marked as "held". Acquires
133 132 won't be possible.
134 133 :param auto_renewal:
135 134 If set to ``True``, Lock will automatically renew the lock so that it
136 135 doesn't expire for as long as the lock is held (acquire() called
137 136 or running in a context manager).
138 137
139 138 Implementation note: Renewal will happen using a daemon thread with
140 139 an interval of ``expire*2/3``. If wishing to use a different renewal
141 140 time, subclass Lock, call ``super().__init__()`` then set
142 141 ``self._lock_renewal_interval`` to your desired interval.
143 142 :param strict:
144 143 If set ``True`` then the ``redis_client`` needs to be an instance of ``redis.StrictRedis``.
145 144 :param signal_expire:
146 145 Advanced option to override signal list expiration in milliseconds. Increase it for very slow clients. Default: ``1000``.
147 146 """
148 147 if strict and not isinstance(redis_client, StrictRedis):
149 148 raise ValueError("redis_client must be instance of StrictRedis. "
150 149 "Use strict=False if you know what you're doing.")
151 150 if auto_renewal and expire is None:
152 151 raise ValueError("Expire may not be None when auto_renewal is set")
153 152
154 153 self._client = redis_client
155 154
156 155 if expire:
157 156 expire = int(expire)
158 157 if expire < 0:
159 158 raise ValueError("A negative expire is not acceptable.")
160 159 else:
161 160 expire = None
162 161 self._expire = expire
163 162
164 163 self._signal_expire = signal_expire
165 164 if id is None:
166 165 self._id = b64encode(urandom(18)).decode('ascii')
167 166 elif isinstance(id, binary_type):
168 167 try:
169 168 self._id = id.decode('ascii')
170 169 except UnicodeDecodeError:
171 170 self._id = b64encode(id).decode('ascii')
172 171 elif isinstance(id, text_type):
173 172 self._id = id
174 173 else:
175 raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id))
174 raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
176 175 self._name = 'lock:' + name
177 176 self._signal = 'lock-signal:' + name
178 177 self._lock_renewal_interval = (float(expire) * 2 / 3
179 178 if auto_renewal
180 179 else None)
181 180 self._lock_renewal_thread = None
182 181
183 182 self.register_scripts(redis_client)
184 183
185 184 @classmethod
186 185 def register_scripts(cls, redis_client):
187 186 global reset_all_script
188 187 if reset_all_script is None:
189 reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
190 188 cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
191 189 cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
192 190 cls.reset_script = redis_client.register_script(RESET_SCRIPT)
193 191 cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
192 reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
194 193
195 194 @property
196 195 def _held(self):
197 196 return self.id == self.get_owner_id()
198 197
199 198 def reset(self):
200 199 """
201 200 Forcibly deletes the lock. Use this with care.
202 201 """
203 202 self.reset_script(client=self._client, keys=(self._name, self._signal), args=(self.id, self._signal_expire))
204 203
205 204 @property
206 205 def id(self):
207 206 return self._id
208 207
209 208 def get_owner_id(self):
210 209 owner_id = self._client.get(self._name)
211 210 if isinstance(owner_id, binary_type):
212 211 owner_id = owner_id.decode('ascii', 'replace')
213 212 return owner_id
214 213
215 214 def acquire(self, blocking=True, timeout=None):
216 215 """
217 216 :param blocking:
218 217 Boolean value specifying whether lock should be blocking or not.
219 218 :param timeout:
220 219 An integer value specifying the maximum number of seconds to block.
221 220 """
222 221 logger = loggers["acquire"]
223 222
224 logger.debug("Getting acquire on %r ...", self._name)
223 logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
225 224
226 225 if self._held:
227 226 owner_id = self.get_owner_id()
228 227 raise AlreadyAcquired("Already acquired from this Lock instance. Lock id: {}".format(owner_id))
229 228
230 229 if not blocking and timeout is not None:
231 230 raise TimeoutNotUsable("Timeout cannot be used if blocking=False")
232 231
233 232 if timeout:
234 233 timeout = int(timeout)
235 234 if timeout < 0:
236 raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout)
235 raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
237 236
238 237 if self._expire and not self._lock_renewal_interval and timeout > self._expire:
239 raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire))
238 raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
240 239
241 240 busy = True
242 241 blpop_timeout = timeout or self._expire or 0
243 242 timed_out = False
244 243 while busy:
245 244 busy = not self._client.set(self._name, self._id, nx=True, ex=self._expire)
246 245 if busy:
247 246 if timed_out:
248 247 return False
249 248 elif blocking:
250 249 timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
251 250 else:
252 logger.warning("Failed to get %r.", self._name)
251 logger.warning("Failed to acquire Lock(%r).", self._name)
253 252 return False
254 253
255 logger.info("Got lock for %r.", self._name)
254 logger.debug("Acquired Lock(%r).", self._name)
256 255 if self._lock_renewal_interval is not None:
257 256 self._start_lock_renewer()
258 257 return True
259 258
260 259 def extend(self, expire=None):
261 """Extends expiration time of the lock.
260 """
261 Extends expiration time of the lock.
262 262
263 263 :param expire:
264 264 New expiration time. If ``None`` - `expire` provided during
265 265 lock initialization will be taken.
266 266 """
267 267 if expire:
268 268 expire = int(expire)
269 269 if expire < 0:
270 270 raise ValueError("A negative expire is not acceptable.")
271 271 elif self._expire is not None:
272 272 expire = self._expire
273 273 else:
274 274 raise TypeError(
275 275 "To extend a lock 'expire' must be provided as an "
276 276 "argument to extend() method or at initialization time."
277 277 )
278 278
279 279 error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
280 280 if error == 1:
281 raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
281 raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
282 282 elif error == 2:
283 raise NotExpirable("Lock %s has no assigned expiration time" % self._name)
283 raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
284 284 elif error:
285 raise RuntimeError("Unsupported error code %s from EXTEND script" % error)
285 raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
286 286
287 287 @staticmethod
288 def _lock_renewer(lockref, interval, stop):
288 def _lock_renewer(name, lockref, interval, stop):
289 289 """
290 290 Renew the lock key in redis every `interval` seconds for as long
291 291 as `self._lock_renewal_thread.should_exit` is False.
292 292 """
293 293 while not stop.wait(timeout=interval):
294 loggers["refresh.thread.start"].debug("Refreshing lock")
295 lock = lockref()
294 loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
295 lock: "Lock" = lockref()
296 296 if lock is None:
297 297 loggers["refresh.thread.stop"].debug(
298 "The lock no longer exists, stopping lock refreshing"
298 "Stopping loop because Lock(%r) was garbage collected.", name
299 299 )
300 300 break
301 301 lock.extend(expire=lock._expire)
302 302 del lock
303 loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing")
303 loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
304 304
305 305 def _start_lock_renewer(self):
306 306 """
307 307 Starts the lock refresher thread.
308 308 """
309 309 if self._lock_renewal_thread is not None:
310 310 raise AlreadyStarted("Lock refresh thread already started")
311 311
312 312 loggers["refresh.start"].debug(
313 "Starting thread to refresh lock every %s seconds",
314 self._lock_renewal_interval
313 "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
314 self._name, self._lock_renewal_interval
315 315 )
316 316 self._lock_renewal_stop = threading.Event()
317 317 self._lock_renewal_thread = threading.Thread(
318 318 group=None,
319 319 target=self._lock_renewer,
320 kwargs={'lockref': weakref.ref(self),
321 'interval': self._lock_renewal_interval,
322 'stop': self._lock_renewal_stop}
320 kwargs={
321 'name': self._name,
322 'lockref': weakref.ref(self),
323 'interval': self._lock_renewal_interval,
324 'stop': self._lock_renewal_stop,
325 },
323 326 )
324 self._lock_renewal_thread.setDaemon(True)
327 self._lock_renewal_thread.daemon = True
325 328 self._lock_renewal_thread.start()
326 329
327 330 def _stop_lock_renewer(self):
328 331 """
329 332 Stop the lock renewer.
330 333
331 334 This signals the renewal thread and waits for its exit.
332 335 """
333 336 if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
334 337 return
335 loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop")
338 loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
336 339 self._lock_renewal_stop.set()
337 340 self._lock_renewal_thread.join()
338 341 self._lock_renewal_thread = None
339 loggers["refresh.exit"].debug("Lock refresher has stopped")
342 loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
340 343
341 344 def __enter__(self):
342 345 acquired = self.acquire(blocking=True)
343 assert acquired, "Lock wasn't acquired, but blocking=True"
346 if not acquired:
347 raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
344 348 return self
345 349
346 350 def __exit__(self, exc_type=None, exc_value=None, traceback=None):
347 351 self.release()
348 352
349 353 def release(self):
350 354 """Releases the lock, that was acquired with the same object.
351 355
352 356 .. note::
353 357
354 358 If you want to release a lock that you acquired in a different place you have two choices:
355 359
356 360 * Use ``Lock("name", id=id_from_other_place).release()``
357 361 * Use ``Lock("name").reset()``
358 362 """
359 363 if self._lock_renewal_thread is not None:
360 364 self._stop_lock_renewer()
361 loggers["release"].debug("Releasing %r.", self._name)
365 loggers["release"].debug("Releasing Lock(%r).", self._name)
362 366 error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
363 367 if error == 1:
364 raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
368 raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
365 369 elif error:
366 raise RuntimeError("Unsupported error code %s from EXTEND script." % error)
370 raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
367 371
368 372 def locked(self):
369 373 """
370 374 Return true if the lock is acquired.
371 375
372 376 Checks that lock with same name already exists. This method returns true, even if
373 377 lock have another id.
374 378 """
375 379 return self._client.exists(self._name) == 1
376 380
377 381
378 382 reset_all_script = None
379 383
380 384
381 385 def reset_all(redis_client):
382 386 """
383 387 Forcibly deletes all locks if its remains (like a crash reason). Use this with care.
384 388
385 389 :param redis_client:
386 390 An instance of :class:`~StrictRedis`.
387 391 """
388 392 Lock.register_scripts(redis_client)
389 393
390 394 reset_all_script(client=redis_client) # noqa
@@ -1,46 +1,50 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import logging
4 2
5 3 from .stream import TCPStatsClient, UnixSocketStatsClient # noqa
6 4 from .udp import StatsClient # noqa
7 5
8 6 HOST = 'localhost'
9 7 PORT = 8125
10 8 IPV6 = False
11 9 PREFIX = None
12 10 MAXUDPSIZE = 512
13 11
14 12 log = logging.getLogger('rhodecode.statsd')
15 13
16 14
17 15 def statsd_config(config, prefix='statsd.'):
18 16 _config = {}
19 17 for key in config.keys():
20 18 if key.startswith(prefix):
21 19 _config[key[len(prefix):]] = config[key]
22 20 return _config
23 21
24 22
25 23 def client_from_config(configuration, prefix='statsd.', **kwargs):
26 24 from pyramid.settings import asbool
27 25
28 26 _config = statsd_config(configuration, prefix)
29 27 statsd_enabled = asbool(_config.pop('enabled', False))
30 28 if not statsd_enabled:
31 29 log.debug('statsd client not enabled by statsd.enabled = flag, skipping...')
32 30 return
33 31
34 32 host = _config.pop('statsd_host', HOST)
35 33 port = _config.pop('statsd_port', PORT)
36 34 prefix = _config.pop('statsd_prefix', PREFIX)
37 35 maxudpsize = _config.pop('statsd_maxudpsize', MAXUDPSIZE)
38 36 ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
39 37 log.debug('configured statsd client %s:%s', host, port)
40 38
41 return StatsClient(
42 host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
39 try:
40 client = StatsClient(
41 host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
42 except Exception:
43 log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
44 client = None
45
46 return client
43 47
44 48
45 49 def get_statsd_client(request):
46 50 return client_from_config(request.registry.settings)
@@ -1,107 +1,154 b''
1 from __future__ import absolute_import, division, unicode_literals
2
1 import re
3 2 import random
4 3 from collections import deque
5 4 from datetime import timedelta
5 from repoze.lru import lru_cache
6 6
7 7 from .timer import Timer
8 8
9 TAG_INVALID_CHARS_RE = re.compile(
10 r"[^\w\d_\-:/\.]",
11 #re.UNICODE
12 )
13 TAG_INVALID_CHARS_SUBS = "_"
9 14
10 class StatsClientBase(object):
15 # we save and expose methods called by statsd for discovery
16 buckets_dict = {
17
18 }
19
20
21 @lru_cache(maxsize=500)
22 def _normalize_tags_with_cache(tag_list):
23 return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
24
25
26 def normalize_tags(tag_list):
27 # We have to turn our input tag list into a non-mutable tuple for it to
28 # be hashable (and thus usable) by the @lru_cache decorator.
29 return _normalize_tags_with_cache(tuple(tag_list))
30
31
32 class StatsClientBase:
11 33 """A Base class for various statsd clients."""
12 34
13 35 def close(self):
14 36 """Used to close and clean up any underlying resources."""
15 37 raise NotImplementedError()
16 38
17 39 def _send(self):
18 40 raise NotImplementedError()
19 41
20 42 def pipeline(self):
21 43 raise NotImplementedError()
22 44
23 def timer(self, stat, rate=1):
24 return Timer(self, stat, rate)
45 def timer(self, stat, rate=1, tags=None, auto_send=True):
46 """
47 statsd = StatsdClient.statsd
48 with statsd.timer('bucket_name', auto_send=True) as tmr:
49 # This block will be timed.
50 for i in range(0, 100000):
51 i ** 2
52 # you can access time here...
53 elapsed_ms = tmr.ms
54 """
55 return Timer(self, stat, rate, tags, auto_send=auto_send)
25 56
26 def timing(self, stat, delta, rate=1):
57 def timing(self, stat, delta, rate=1, tags=None, use_decimals=True):
27 58 """
28 59 Send new timing information.
29 60
30 61 `delta` can be either a number of milliseconds or a timedelta.
31 62 """
32 63 if isinstance(delta, timedelta):
33 64 # Convert timedelta to number of milliseconds.
34 65 delta = delta.total_seconds() * 1000.
35 self._send_stat(stat, '%0.6f|ms' % delta, rate)
36
37 def incr(self, stat, count=1, rate=1):
38 """Increment a stat by `count`."""
39 self._send_stat(stat, '%s|c' % count, rate)
66 if use_decimals:
67 fmt = '%0.6f|ms'
68 else:
69 fmt = '%s|ms'
70 self._send_stat(stat, fmt % delta, rate, tags)
40 71
41 def decr(self, stat, count=1, rate=1):
72 def incr(self, stat, count=1, rate=1, tags=None):
73 """Increment a stat by `count`."""
74 self._send_stat(stat, f'{count}|c', rate, tags)
75
76 def decr(self, stat, count=1, rate=1, tags=None):
42 77 """Decrement a stat by `count`."""
43 self.incr(stat, -count, rate)
78 self.incr(stat, -count, rate, tags)
44 79
45 def gauge(self, stat, value, rate=1, delta=False):
80 def gauge(self, stat, value, rate=1, delta=False, tags=None):
46 81 """Set a gauge value."""
47 82 if value < 0 and not delta:
48 83 if rate < 1:
49 84 if random.random() > rate:
50 85 return
51 86 with self.pipeline() as pipe:
52 87 pipe._send_stat(stat, '0|g', 1)
53 pipe._send_stat(stat, '%s|g' % value, 1)
88 pipe._send_stat(stat, f'{value}|g', 1)
54 89 else:
55 90 prefix = '+' if delta and value >= 0 else ''
56 self._send_stat(stat, '%s%s|g' % (prefix, value), rate)
91 self._send_stat(stat, f'{prefix}{value}|g', rate, tags)
57 92
58 93 def set(self, stat, value, rate=1):
59 94 """Set a set value."""
60 self._send_stat(stat, '%s|s' % value, rate)
95 self._send_stat(stat, f'{value}|s', rate)
96
97 def histogram(self, stat, value, rate=1, tags=None):
98 """Set a histogram"""
99 self._send_stat(stat, f'{value}|h', rate, tags)
61 100
62 def _send_stat(self, stat, value, rate):
63 self._after(self._prepare(stat, value, rate))
101 def _send_stat(self, stat, value, rate, tags=None):
102 self._after(self._prepare(stat, value, rate, tags))
64 103
65 def _prepare(self, stat, value, rate):
104 def _prepare(self, stat, value, rate, tags=None):
105 global buckets_dict
106 buckets_dict[stat] = 1
107
66 108 if rate < 1:
67 109 if random.random() > rate:
68 110 return
69 value = '%s|@%s' % (value, rate)
111 value = f'{value}|@{rate}'
70 112
71 113 if self._prefix:
72 stat = '%s.%s' % (self._prefix, stat)
114 stat = f'{self._prefix}.{stat}'
73 115
74 return '%s:%s' % (stat, value)
116 res = '%s:%s%s' % (
117 stat,
118 value,
119 ("|#" + ",".join(normalize_tags(tags))) if tags else "",
120 )
121 return res
75 122
76 123 def _after(self, data):
77 124 if data:
78 125 self._send(data)
79 126
80 127
81 128 class PipelineBase(StatsClientBase):
82 129
83 130 def __init__(self, client):
84 131 self._client = client
85 132 self._prefix = client._prefix
86 133 self._stats = deque()
87 134
88 135 def _send(self):
89 136 raise NotImplementedError()
90 137
91 138 def _after(self, data):
92 139 if data is not None:
93 140 self._stats.append(data)
94 141
95 142 def __enter__(self):
96 143 return self
97 144
98 145 def __exit__(self, typ, value, tb):
99 146 self.send()
100 147
101 148 def send(self):
102 149 if not self._stats:
103 150 return
104 151 self._send()
105 152
106 153 def pipeline(self):
107 154 return self.__class__(self)
@@ -1,75 +1,73 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import socket
4 2
5 3 from .base import StatsClientBase, PipelineBase
6 4
7 5
8 6 class StreamPipeline(PipelineBase):
9 7 def _send(self):
10 8 self._client._after('\n'.join(self._stats))
11 9 self._stats.clear()
12 10
13 11
14 12 class StreamClientBase(StatsClientBase):
15 13 def connect(self):
16 14 raise NotImplementedError()
17 15
18 16 def close(self):
19 17 if self._sock and hasattr(self._sock, 'close'):
20 18 self._sock.close()
21 19 self._sock = None
22 20
23 21 def reconnect(self):
24 22 self.close()
25 23 self.connect()
26 24
27 25 def pipeline(self):
28 26 return StreamPipeline(self)
29 27
30 28 def _send(self, data):
31 29 """Send data to statsd."""
32 30 if not self._sock:
33 31 self.connect()
34 32 self._do_send(data)
35 33
36 34 def _do_send(self, data):
37 35 self._sock.sendall(data.encode('ascii') + b'\n')
38 36
39 37
40 38 class TCPStatsClient(StreamClientBase):
41 39 """TCP version of StatsClient."""
42 40
43 41 def __init__(self, host='localhost', port=8125, prefix=None,
44 42 timeout=None, ipv6=False):
45 43 """Create a new client."""
46 44 self._host = host
47 45 self._port = port
48 46 self._ipv6 = ipv6
49 47 self._timeout = timeout
50 48 self._prefix = prefix
51 49 self._sock = None
52 50
53 51 def connect(self):
54 52 fam = socket.AF_INET6 if self._ipv6 else socket.AF_INET
55 53 family, _, _, _, addr = socket.getaddrinfo(
56 54 self._host, self._port, fam, socket.SOCK_STREAM)[0]
57 55 self._sock = socket.socket(family, socket.SOCK_STREAM)
58 56 self._sock.settimeout(self._timeout)
59 57 self._sock.connect(addr)
60 58
61 59
62 60 class UnixSocketStatsClient(StreamClientBase):
63 61 """Unix domain socket version of StatsClient."""
64 62
65 63 def __init__(self, socket_path, prefix=None, timeout=None):
66 64 """Create a new client."""
67 65 self._socket_path = socket_path
68 66 self._timeout = timeout
69 67 self._prefix = prefix
70 68 self._sock = None
71 69
72 70 def connect(self):
73 71 self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
74 72 self._sock.settimeout(self._timeout)
75 73 self._sock.connect(self._socket_path)
@@ -1,71 +1,66 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import functools
4
5 # Use timer that's not susceptible to time of day adjustments.
6 try:
7 # perf_counter is only present on Py3.3+
8 from time import perf_counter as time_now
9 except ImportError:
10 # fall back to using time
11 from time import time as time_now
2 from time import perf_counter as time_now
12 3
13 4
14 5 def safe_wraps(wrapper, *args, **kwargs):
15 6 """Safely wraps partial functions."""
16 7 while isinstance(wrapper, functools.partial):
17 8 wrapper = wrapper.func
18 9 return functools.wraps(wrapper, *args, **kwargs)
19 10
20 11
21 class Timer(object):
12 class Timer:
22 13 """A context manager/decorator for statsd.timing()."""
23 14
24 def __init__(self, client, stat, rate=1):
15 def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True):
25 16 self.client = client
26 17 self.stat = stat
27 18 self.rate = rate
19 self.tags = tags
28 20 self.ms = None
29 21 self._sent = False
30 22 self._start_time = None
23 self.use_decimals = use_decimals
24 self.auto_send = auto_send
31 25
32 26 def __call__(self, f):
33 27 """Thread-safe timing function decorator."""
34 28 @safe_wraps(f)
35 29 def _wrapped(*args, **kwargs):
36 30 start_time = time_now()
37 31 try:
38 32 return f(*args, **kwargs)
39 33 finally:
40 34 elapsed_time_ms = 1000.0 * (time_now() - start_time)
41 self.client.timing(self.stat, elapsed_time_ms, self.rate)
35 self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals)
36 self._sent = True
42 37 return _wrapped
43 38
44 39 def __enter__(self):
45 40 return self.start()
46 41
47 42 def __exit__(self, typ, value, tb):
48 self.stop()
43 self.stop(send=self.auto_send)
49 44
50 45 def start(self):
51 46 self.ms = None
52 47 self._sent = False
53 48 self._start_time = time_now()
54 49 return self
55 50
56 51 def stop(self, send=True):
57 52 if self._start_time is None:
58 53 raise RuntimeError('Timer has not started.')
59 54 dt = time_now() - self._start_time
60 55 self.ms = 1000.0 * dt # Convert to milliseconds.
61 56 if send:
62 57 self.send()
63 58 return self
64 59
65 60 def send(self):
66 61 if self.ms is None:
67 62 raise RuntimeError('No data recorded.')
68 63 if self._sent:
69 64 raise RuntimeError('Already sent data.')
70 65 self._sent = True
71 self.client.timing(self.stat, self.ms, self.rate)
66 self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals)
@@ -1,55 +1,53 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import socket
4 2
5 3 from .base import StatsClientBase, PipelineBase
6 4
7 5
8 6 class Pipeline(PipelineBase):
9 7
10 8 def __init__(self, client):
11 super(Pipeline, self).__init__(client)
9 super().__init__(client)
12 10 self._maxudpsize = client._maxudpsize
13 11
14 12 def _send(self):
15 13 data = self._stats.popleft()
16 14 while self._stats:
17 15 # Use popleft to preserve the order of the stats.
18 16 stat = self._stats.popleft()
19 17 if len(stat) + len(data) + 1 >= self._maxudpsize:
20 18 self._client._after(data)
21 19 data = stat
22 20 else:
23 21 data += '\n' + stat
24 22 self._client._after(data)
25 23
26 24
27 25 class StatsClient(StatsClientBase):
28 26 """A client for statsd."""
29 27
30 28 def __init__(self, host='localhost', port=8125, prefix=None,
31 29 maxudpsize=512, ipv6=False):
32 30 """Create a new client."""
33 31 fam = socket.AF_INET6 if ipv6 else socket.AF_INET
34 32 family, _, _, _, addr = socket.getaddrinfo(
35 33 host, port, fam, socket.SOCK_DGRAM)[0]
36 34 self._addr = addr
37 35 self._sock = socket.socket(family, socket.SOCK_DGRAM)
38 36 self._prefix = prefix
39 37 self._maxudpsize = maxudpsize
40 38
41 39 def _send(self, data):
42 40 """Send data to statsd."""
43 41 try:
44 42 self._sock.sendto(data.encode('ascii'), self._addr)
45 43 except (socket.error, RuntimeError):
46 44 # No time for love, Dr. Jones!
47 45 pass
48 46
49 47 def close(self):
50 48 if self._sock and hasattr(self._sock, 'close'):
51 49 self._sock.close()
52 50 self._sock = None
53 51
54 52 def pipeline(self):
55 53 return Pipeline(self)
@@ -1,175 +1,273 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20
18 import io
21 19 import os
22 20 import time
21 import sys
23 22 import datetime
24 23 import msgpack
25 24 import logging
26 25 import traceback
27 26 import tempfile
28
29 from pyramid import compat
27 import glob
30 28
31 29 log = logging.getLogger(__name__)
32 30
33 31 # NOTE: Any changes should be synced with exc_tracking at rhodecode.lib.exc_tracking
34 32 global_prefix = 'vcsserver'
35 33 exc_store_dir_name = 'rc_exception_store_v1'
36 34
37 35
38 def exc_serialize(exc_id, tb, exc_type):
39
36 def exc_serialize(exc_id, tb, exc_type, extra_data=None):
40 37 data = {
41 'version': 'v1',
42 'exc_id': exc_id,
43 'exc_utc_date': datetime.datetime.utcnow().isoformat(),
44 'exc_timestamp': repr(time.time()),
45 'exc_message': tb,
46 'exc_type': exc_type,
38 "version": "v1",
39 "exc_id": exc_id,
40 "exc_utc_date": datetime.datetime.utcnow().isoformat(),
41 "exc_timestamp": repr(time.time()),
42 "exc_message": tb,
43 "exc_type": exc_type,
47 44 }
45 if extra_data:
46 data.update(extra_data)
48 47 return msgpack.packb(data), data
49 48
50 49
51 50 def exc_unserialize(tb):
52 51 return msgpack.unpackb(tb)
53 52
54 53
54 _exc_store = None
55
56
55 57 def get_exc_store():
56 58 """
57 59 Get and create exception store if it's not existing
58 60 """
61 global _exc_store
62
63 if _exc_store is not None:
64 # quick global cache
65 return _exc_store
66
59 67 import vcsserver as app
60 68
61 exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir()
69 exc_store_dir = (
70 app.CONFIG.get("exception_tracker.store_path", "") or tempfile.gettempdir()
71 )
62 72 _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name)
63 73
64 74 _exc_store_path = os.path.abspath(_exc_store_path)
65 75 if not os.path.isdir(_exc_store_path):
66 76 os.makedirs(_exc_store_path)
67 log.debug('Initializing exceptions store at %s', _exc_store_path)
77 log.debug("Initializing exceptions store at %s", _exc_store_path)
78 _exc_store = _exc_store_path
79
68 80 return _exc_store_path
69 81
70 82
71 def _store_exception(exc_id, exc_info, prefix, request_path=''):
72 exc_type, exc_value, exc_traceback = exc_info
83 def get_detailed_tb(exc_info):
84 try:
85 from pip._vendor.rich import (
86 traceback as rich_tb,
87 scope as rich_scope,
88 console as rich_console,
89 )
90 except ImportError:
91 try:
92 from rich import (
93 traceback as rich_tb,
94 scope as rich_scope,
95 console as rich_console,
96 )
97 except ImportError:
98 return None
99
100 console = rich_console.Console(width=160, file=io.StringIO())
101
102 exc = rich_tb.Traceback.extract(*exc_info, show_locals=True)
103
104 tb_rich = rich_tb.Traceback(
105 trace=exc,
106 width=160,
107 extra_lines=3,
108 theme=None,
109 word_wrap=False,
110 show_locals=False,
111 max_frames=100,
112 )
73 113
74 tb = ''.join(traceback.format_exception(
75 exc_type, exc_value, exc_traceback, None))
114 # last_stack = exc.stacks[-1]
115 # last_frame = last_stack.frames[-1]
116 # if last_frame and last_frame.locals:
117 # console.print(
118 # rich_scope.render_scope(
119 # last_frame.locals,
120 # title=f'{last_frame.filename}:{last_frame.lineno}'))
121
122 console.print(tb_rich)
123 formatted_locals = console.file.getvalue()
124
125 return formatted_locals
126
76 127
77 detailed_tb = getattr(exc_value, '_org_exc_tb', None)
128 def get_request_metadata(request=None) -> dict:
129 request_metadata = {}
130 if not request:
131 from pyramid.threadlocal import get_current_request
132
133 request = get_current_request()
134
135 # NOTE(marcink): store request information into exc_data
136 if request:
137 request_metadata["client_address"] = getattr(request, "client_addr", "")
138 request_metadata["user_agent"] = getattr(request, "user_agent", "")
139 request_metadata["method"] = getattr(request, "method", "")
140 request_metadata["url"] = getattr(request, "url", "")
141 return request_metadata
142
143
144 def format_exc(exc_info, use_detailed_tb=True):
145 exc_type, exc_value, exc_traceback = exc_info
146 tb = "++ TRACEBACK ++\n\n"
147 tb += "".join(traceback.format_exception(exc_type, exc_value, exc_traceback, None))
148
149 detailed_tb = getattr(exc_value, "_org_exc_tb", None)
78 150
79 151 if detailed_tb:
80 if isinstance(detailed_tb, compat.string_types):
152 remote_tb = detailed_tb
153 if isinstance(detailed_tb, str):
81 154 remote_tb = [detailed_tb]
82 155
83 156 tb += (
84 '\n+++ BEG SOURCE EXCEPTION +++\n\n'
85 '{}\n'
86 '+++ END SOURCE EXCEPTION +++\n'
87 ''.format('\n'.join(remote_tb))
157 "\n+++ BEG SOURCE EXCEPTION +++\n\n"
158 "{}\n"
159 "+++ END SOURCE EXCEPTION +++\n"
160 "".format("\n".join(remote_tb))
88 161 )
89 162
90 163 # Avoid that remote_tb also appears in the frame
91 164 del remote_tb
92 165
166 if use_detailed_tb:
167 locals_tb = get_detailed_tb(exc_info)
168 if locals_tb:
169 tb += f"\n+++ DETAILS +++\n\n{locals_tb}\n" ""
170 return tb
171
172
173 def _store_exception(exc_id, exc_info, prefix, request_path=''):
174 """
175 Low level function to store exception in the exception tracker
176 """
177
178 extra_data = {}
179 extra_data.update(get_request_metadata())
180
181 exc_type, exc_value, exc_traceback = exc_info
182 tb = format_exc(exc_info)
183
93 184 exc_type_name = exc_type.__name__
185 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name, extra_data=extra_data)
186
187 exc_pref_id = f"{exc_id}_{prefix}_{org_data['exc_timestamp']}"
94 188 exc_store_path = get_exc_store()
95 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name)
96 exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp'])
97 189 if not os.path.isdir(exc_store_path):
98 190 os.makedirs(exc_store_path)
99 191 stored_exc_path = os.path.join(exc_store_path, exc_pref_id)
100 with open(stored_exc_path, 'wb') as f:
192 with open(stored_exc_path, "wb") as f:
101 193 f.write(exc_data)
102 log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path)
194 log.debug("Stored generated exception %s as: %s", exc_id, stored_exc_path)
103 195
104 log.error(
105 'error occurred handling this request.\n'
106 'Path: `%s`, tb: %s',
107 request_path, tb)
196 if request_path:
197 log.error(
198 'error occurred handling this request.\n'
199 'Path: `%s`, %s',
200 request_path, tb)
108 201
109 202
110 203 def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''):
111 204 """
112 205 Example usage::
113 206
114 207 exc_info = sys.exc_info()
115 208 store_exception(id(exc_info), exc_info)
116 209 """
117 210
118 211 try:
119 _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix,
120 request_path=request_path)
212 exc_type = exc_info[0]
213 exc_type_name = exc_type.__name__
214
215 _store_exception(
216 exc_id=exc_id, exc_info=exc_info, prefix=prefix, request_path=request_path,
217 )
218 return exc_id, exc_type_name
121 219 except Exception:
122 log.exception('Failed to store exception `%s` information', exc_id)
220 log.exception("Failed to store exception `%s` information", exc_id)
123 221 # there's no way this can fail, it will crash server badly if it does.
124 222 pass
125 223
126 224
127 225 def _find_exc_file(exc_id, prefix=global_prefix):
128 226 exc_store_path = get_exc_store()
129 227 if prefix:
130 exc_id = '{}_{}'.format(exc_id, prefix)
228 exc_id = f"{exc_id}_{prefix}"
131 229 else:
132 230 # search without a prefix
133 exc_id = '{}'.format(exc_id)
231 exc_id = f"{exc_id}"
134 232
135 # we need to search the store for such start pattern as above
136 for fname in os.listdir(exc_store_path):
137 if fname.startswith(exc_id):
138 exc_id = os.path.join(exc_store_path, fname)
139 break
140 continue
141 else:
142 exc_id = None
233 found_exc_id = None
234 matches = glob.glob(os.path.join(exc_store_path, exc_id) + "*")
235 if matches:
236 found_exc_id = matches[0]
143 237
144 return exc_id
238 return found_exc_id
145 239
146 240
147 241 def _read_exception(exc_id, prefix):
148 242 exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix)
149 243 if exc_id_file_path:
150 with open(exc_id_file_path, 'rb') as f:
244 with open(exc_id_file_path, "rb") as f:
151 245 return exc_unserialize(f.read())
152 246 else:
153 log.debug('Exception File `%s` not found', exc_id_file_path)
247 log.debug("Exception File `%s` not found", exc_id_file_path)
154 248 return None
155 249
156 250
157 251 def read_exception(exc_id, prefix=global_prefix):
158 252 try:
159 253 return _read_exception(exc_id=exc_id, prefix=prefix)
160 254 except Exception:
161 log.exception('Failed to read exception `%s` information', exc_id)
255 log.exception("Failed to read exception `%s` information", exc_id)
162 256 # there's no way this can fail, it will crash server badly if it does.
163 257 return None
164 258
165 259
166 260 def delete_exception(exc_id, prefix=global_prefix):
167 261 try:
168 262 exc_id_file_path = _find_exc_file(exc_id, prefix=prefix)
169 263 if exc_id_file_path:
170 264 os.remove(exc_id_file_path)
171 265
172 266 except Exception:
173 log.exception('Failed to remove exception `%s` information', exc_id)
267 log.exception("Failed to remove exception `%s` information", exc_id)
174 268 # there's no way this can fail, it will crash server badly if it does.
175 269 pass
270
271
272 def generate_id():
273 return id(object())
@@ -1,65 +1,63 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18
21 19 import logging
22 20
23 21 from repoze.lru import LRUCache
24 22
25 from vcsserver.utils import safe_str
23 from vcsserver.str_utils import safe_str
26 24
27 25 log = logging.getLogger(__name__)
28 26
29 27
30 28 class LRUDict(LRUCache):
31 29 """
32 30 Wrapper to provide partial dict access
33 31 """
34 32
35 33 def __setitem__(self, key, value):
36 34 return self.put(key, value)
37 35
38 36 def __getitem__(self, key):
39 37 return self.get(key)
40 38
41 39 def __contains__(self, key):
42 40 return bool(self.get(key))
43 41
44 42 def __delitem__(self, key):
45 43 del self.data[key]
46 44
47 45 def keys(self):
48 return self.data.keys()
46 return list(self.data.keys())
49 47
50 48
51 49 class LRUDictDebug(LRUDict):
52 50 """
53 51 Wrapper to provide some debug options
54 52 """
55 53 def _report_keys(self):
56 elems_cnt = '%s/%s' % (len(self.keys()), self.size)
54 elems_cnt = f'{len(list(self.keys()))}/{self.size}'
57 55 # trick for pformat print it more nicely
58 56 fmt = '\n'
59 57 for cnt, elem in enumerate(self.keys()):
60 fmt += '%s - %s\n' % (cnt+1, safe_str(elem))
58 fmt += f'{cnt+1} - {safe_str(elem)}\n'
61 59 log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
62 60
63 61 def __getitem__(self, key):
64 62 self._report_keys()
65 63 return self.get(key)
@@ -1,79 +1,114 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import logging
19 import threading
20
19 21 from dogpile.cache import register_backend
20 22
23 from . import region_meta
24 from .utils import (
25 backend_key_generator,
26 clear_cache_namespace,
27 get_default_cache_settings,
28 get_or_create_region,
29 make_region,
30 str2bool,
31 )
32
33 module_name = 'vcsserver'
34
21 35 register_backend(
22 "dogpile.cache.rc.memory_lru", "vcsserver.lib.rc_cache.backends",
36 "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends",
23 37 "LRUMemoryBackend")
24 38
25 39 register_backend(
26 "dogpile.cache.rc.file_namespace", "vcsserver.lib.rc_cache.backends",
40 "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends",
27 41 "FileNamespaceBackend")
28 42
29 43 register_backend(
30 "dogpile.cache.rc.redis", "vcsserver.lib.rc_cache.backends",
44 "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends",
31 45 "RedisPickleBackend")
32 46
33 47 register_backend(
34 "dogpile.cache.rc.redis_msgpack", "vcsserver.lib.rc_cache.backends",
48 "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends",
35 49 "RedisMsgPackBackend")
36 50
37 51
38 52 log = logging.getLogger(__name__)
39 53
40 from . import region_meta
41 from .utils import (
42 get_default_cache_settings, backend_key_generator, get_or_create_region,
43 clear_cache_namespace, make_region)
54
55 CACHE_OBJ_CACHE_VER = 'v2'
56
57 CLEAR_DELETE = 'delete'
58 CLEAR_INVALIDATE = 'invalidate'
59
60
61 def async_creation_runner(cache, cache_key, creator, mutex):
62
63 def runner():
64 try:
65 value = creator()
66 cache.set(cache_key, value)
67 finally:
68 mutex.release()
69
70 thread = threading.Thread(target=runner)
71 thread.start()
44 72
45 73
46 74 def configure_dogpile_cache(settings):
47 75 cache_dir = settings.get('cache_dir')
48 76 if cache_dir:
49 77 region_meta.dogpile_config_defaults['cache_dir'] = cache_dir
50 78
51 79 rc_cache_data = get_default_cache_settings(settings, prefixes=['rc_cache.'])
52 80
53 81 # inspect available namespaces
54 82 avail_regions = set()
55 83 for key in rc_cache_data.keys():
56 84 namespace_name = key.split('.', 1)[0]
57 85 if namespace_name in avail_regions:
58 86 continue
59 87
60 88 avail_regions.add(namespace_name)
61 89 log.debug('dogpile: found following cache regions: %s', namespace_name)
62 90
63 91 new_region = make_region(
64 92 name=namespace_name,
65 function_key_generator=None
93 function_key_generator=None,
94 async_creation_runner=None
66 95 )
67 96
68 new_region.configure_from_config(settings, 'rc_cache.{}.'.format(namespace_name))
97 new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.')
69 98 new_region.function_key_generator = backend_key_generator(new_region.actual_backend)
99
100 async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false'))
101 if async_creator:
102 log.debug('configuring region %s with async creator', new_region)
103 new_region.async_creation_runner = async_creation_runner
104
70 105 if log.isEnabledFor(logging.DEBUG):
71 region_args = dict(backend=new_region.actual_backend.__class__,
106 region_args = dict(backend=new_region.actual_backend,
72 107 region_invalidator=new_region.region_invalidator.__class__)
73 log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args)
108 log.debug('dogpile: registering a new region key=`%s` args=%s', namespace_name, region_args)
74 109
75 110 region_meta.dogpile_cache_regions[namespace_name] = new_region
76 111
77 112
78 113 def includeme(config):
79 114 configure_dogpile_cache(config.registry.settings)
@@ -1,329 +1,303 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import time
19 import errno
18 #import errno
19 import fcntl
20 import functools
20 21 import logging
22 import os
23 import pickle
24 #import time
21 25
26 #import gevent
22 27 import msgpack
23 28 import redis
24 29
25 from dogpile.cache.api import CachedValue
26 from dogpile.cache.backends import memory as memory_backend
30 flock_org = fcntl.flock
31 from typing import Union
32
33 from dogpile.cache.api import Deserializer, Serializer
27 34 from dogpile.cache.backends import file as file_backend
35 from dogpile.cache.backends import memory as memory_backend
28 36 from dogpile.cache.backends import redis as redis_backend
29 from dogpile.cache.backends.file import NO_VALUE, compat, FileLock
37 from dogpile.cache.backends.file import FileLock
30 38 from dogpile.cache.util import memoized_property
31 39
32 from pyramid.settings import asbool
33
34 40 from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
35 from vcsserver.utils import safe_str
36
41 from vcsserver.str_utils import safe_bytes, safe_str
42 from vcsserver.type_utils import str2bool
37 43
38 44 _default_max_size = 1024
39 45
40 46 log = logging.getLogger(__name__)
41 47
42 48
43 49 class LRUMemoryBackend(memory_backend.MemoryBackend):
44 50 key_prefix = 'lru_mem_backend'
45 51 pickle_values = False
46 52
47 53 def __init__(self, arguments):
48 max_size = arguments.pop('max_size', _default_max_size)
54 self.max_size = arguments.pop('max_size', _default_max_size)
49 55
50 56 LRUDictClass = LRUDict
51 57 if arguments.pop('log_key_count', None):
52 58 LRUDictClass = LRUDictDebug
53 59
54 arguments['cache_dict'] = LRUDictClass(max_size)
55 super(LRUMemoryBackend, self).__init__(arguments)
60 arguments['cache_dict'] = LRUDictClass(self.max_size)
61 super().__init__(arguments)
62
63 def __repr__(self):
64 return f'{self.__class__}(maxsize=`{self.max_size}`)'
65
66 def __str__(self):
67 return self.__repr__()
56 68
57 69 def delete(self, key):
58 70 try:
59 71 del self._cache[key]
60 72 except KeyError:
61 73 # we don't care if key isn't there at deletion
62 74 pass
63 75
76 def list_keys(self, prefix):
77 return list(self._cache.keys())
78
64 79 def delete_multi(self, keys):
65 80 for key in keys:
66 81 self.delete(key)
67 82
68
69 class PickleSerializer(object):
70
71 def _dumps(self, value, safe=False):
72 try:
73 return compat.pickle.dumps(value)
74 except Exception:
75 if safe:
76 return NO_VALUE
77 else:
78 raise
79
80 def _loads(self, value, safe=True):
81 try:
82 return compat.pickle.loads(value)
83 except Exception:
84 if safe:
85 return NO_VALUE
86 else:
87 raise
83 def delete_multi_by_prefix(self, prefix):
84 cache_keys = self.list_keys(prefix=prefix)
85 num_affected_keys = len(cache_keys)
86 if num_affected_keys:
87 self.delete_multi(cache_keys)
88 return num_affected_keys
88 89
89 90
90 class MsgPackSerializer(object):
91
92 def _dumps(self, value, safe=False):
93 try:
94 return msgpack.packb(value)
95 except Exception:
96 if safe:
97 return NO_VALUE
98 else:
99 raise
100
101 def _loads(self, value, safe=True):
102 """
103 pickle maintained the `CachedValue` wrapper of the tuple
104 msgpack does not, so it must be added back in.
105 """
106 try:
107 value = msgpack.unpackb(value, use_list=False)
108 return CachedValue(*value)
109 except Exception:
110 if safe:
111 return NO_VALUE
112 else:
113 raise
91 class PickleSerializer:
92 serializer: None | Serializer = staticmethod( # type: ignore
93 functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
94 )
95 deserializer: None | Deserializer = staticmethod( # type: ignore
96 functools.partial(pickle.loads)
97 )
114 98
115 99
116 import fcntl
117 flock_org = fcntl.flock
100 class MsgPackSerializer:
101 serializer: None | Serializer = staticmethod( # type: ignore
102 msgpack.packb
103 )
104 deserializer: None | Deserializer = staticmethod( # type: ignore
105 functools.partial(msgpack.unpackb, use_list=False)
106 )
118 107
119 108
120 109 class CustomLockFactory(FileLock):
121 110
122 111 pass
123 112
124 113
125 114 class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
126 115 key_prefix = 'file_backend'
127 116
128 117 def __init__(self, arguments):
129 118 arguments['lock_factory'] = CustomLockFactory
130 119 db_file = arguments.get('filename')
131 120
132 log.debug('initialing %s DB in %s', self.__class__.__name__, db_file)
121 log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
122 db_file_dir = os.path.dirname(db_file)
123 if not os.path.isdir(db_file_dir):
124 os.makedirs(db_file_dir)
125
133 126 try:
134 super(FileNamespaceBackend, self).__init__(arguments)
127 super().__init__(arguments)
135 128 except Exception:
136 log.error('Failed to initialize db at: %s', db_file)
129 log.exception('Failed to initialize db at: %s', db_file)
137 130 raise
138 131
139 132 def __repr__(self):
140 return '{} `{}`'.format(self.__class__, self.filename)
133 return f'{self.__class__}(file=`{self.filename}`)'
134
135 def __str__(self):
136 return self.__repr__()
141 137
142 def list_keys(self, prefix=''):
143 prefix = '{}:{}'.format(self.key_prefix, prefix)
138 def _get_keys_pattern(self, prefix: bytes = b''):
139 return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
144 140
145 def cond(v):
141 def list_keys(self, prefix: bytes = b''):
142 prefix = self._get_keys_pattern(prefix)
143
144 def cond(dbm_key: bytes):
146 145 if not prefix:
147 146 return True
148 147
149 if v.startswith(prefix):
148 if dbm_key.startswith(prefix):
150 149 return True
151 150 return False
152 151
153 152 with self._dbm_file(True) as dbm:
154 153 try:
155 return filter(cond, dbm.keys())
154 return list(filter(cond, dbm.keys()))
156 155 except Exception:
157 156 log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
158 157 raise
159 158
159 def delete_multi_by_prefix(self, prefix):
160 cache_keys = self.list_keys(prefix=prefix)
161 num_affected_keys = len(cache_keys)
162 if num_affected_keys:
163 self.delete_multi(cache_keys)
164 return num_affected_keys
165
160 166 def get_store(self):
161 167 return self.filename
162 168
163 def _dbm_get(self, key):
164 with self._dbm_file(False) as dbm:
165 if hasattr(dbm, 'get'):
166 value = dbm.get(key, NO_VALUE)
167 else:
168 # gdbm objects lack a .get method
169 try:
170 value = dbm[key]
171 except KeyError:
172 value = NO_VALUE
173 if value is not NO_VALUE:
174 value = self._loads(value)
175 return value
176
177 def get(self, key):
178 try:
179 return self._dbm_get(key)
180 except Exception:
181 log.error('Failed to fetch DBM key %s from DB: %s', key, self.get_store())
182 raise
183
184 def set(self, key, value):
185 with self._dbm_file(True) as dbm:
186 dbm[key] = self._dumps(value)
187
188 def set_multi(self, mapping):
189 with self._dbm_file(True) as dbm:
190 for key, value in mapping.items():
191 dbm[key] = self._dumps(value)
192
193 169
194 170 class BaseRedisBackend(redis_backend.RedisBackend):
195 171 key_prefix = ''
196 172
197 173 def __init__(self, arguments):
198 super(BaseRedisBackend, self).__init__(arguments)
174 self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
175 super().__init__(arguments)
176
199 177 self._lock_timeout = self.lock_timeout
200 self._lock_auto_renewal = asbool(arguments.pop("lock_auto_renewal", True))
178 self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
201 179
202 180 if self._lock_auto_renewal and not self._lock_timeout:
203 181 # set default timeout for auto_renewal
204 182 self._lock_timeout = 30
205 183
184 def __repr__(self):
185 return f'{self.__class__}(conn=`{self.db_conn}`)'
186
187 def __str__(self):
188 return self.__repr__()
189
206 190 def _create_client(self):
207 191 args = {}
208 192
209 193 if self.url is not None:
210 194 args.update(url=self.url)
211 195
212 196 else:
213 197 args.update(
214 198 host=self.host, password=self.password,
215 199 port=self.port, db=self.db
216 200 )
217 201
218 202 connection_pool = redis.ConnectionPool(**args)
203 self.writer_client = redis.StrictRedis(
204 connection_pool=connection_pool
205 )
206 self.reader_client = self.writer_client
219 207
220 return redis.StrictRedis(connection_pool=connection_pool)
208 def _get_keys_pattern(self, prefix: bytes = b''):
209 return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
210
211 def list_keys(self, prefix: bytes = b''):
212 prefix = self._get_keys_pattern(prefix)
213 return self.reader_client.keys(prefix)
221 214
222 def list_keys(self, prefix=''):
223 prefix = '{}:{}*'.format(self.key_prefix, prefix)
224 return self.client.keys(prefix)
215 def delete_multi_by_prefix(self, prefix, use_lua=False):
216 if use_lua:
217 # high efficient LUA script to delete ALL keys by prefix...
218 lua = """local keys = redis.call('keys', ARGV[1])
219 for i=1,#keys,5000 do
220 redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
221 end
222 return #keys"""
223 num_affected_keys = self.writer_client.eval(
224 lua,
225 0,
226 f"{prefix}*")
227 else:
228 cache_keys = self.list_keys(prefix=prefix)
229 num_affected_keys = len(cache_keys)
230 if num_affected_keys:
231 self.delete_multi(cache_keys)
232 return num_affected_keys
225 233
226 234 def get_store(self):
227 return self.client.connection_pool
228
229 def get(self, key):
230 value = self.client.get(key)
231 if value is None:
232 return NO_VALUE
233 return self._loads(value)
234
235 def get_multi(self, keys):
236 if not keys:
237 return []
238 values = self.client.mget(keys)
239 loads = self._loads
240 return [
241 loads(v) if v is not None else NO_VALUE
242 for v in values]
243
244 def set(self, key, value):
245 if self.redis_expiration_time:
246 self.client.setex(key, self.redis_expiration_time,
247 self._dumps(value))
248 else:
249 self.client.set(key, self._dumps(value))
250
251 def set_multi(self, mapping):
252 dumps = self._dumps
253 mapping = dict(
254 (k, dumps(v))
255 for k, v in mapping.items()
256 )
257
258 if not self.redis_expiration_time:
259 self.client.mset(mapping)
260 else:
261 pipe = self.client.pipeline()
262 for key, value in mapping.items():
263 pipe.setex(key, self.redis_expiration_time, value)
264 pipe.execute()
235 return self.reader_client.connection_pool
265 236
266 237 def get_mutex(self, key):
267 238 if self.distributed_lock:
268 lock_key = redis_backend.u('_lock_{0}').format(safe_str(key))
269 return get_mutex_lock(self.client, lock_key, self._lock_timeout,
270 auto_renewal=self._lock_auto_renewal)
239 lock_key = f'_lock_{safe_str(key)}'
240 return get_mutex_lock(
241 self.writer_client, lock_key,
242 self._lock_timeout,
243 auto_renewal=self._lock_auto_renewal
244 )
271 245 else:
272 246 return None
273 247
274 248
275 249 class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
276 250 key_prefix = 'redis_pickle_backend'
277 251 pass
278 252
279 253
280 254 class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
281 255 key_prefix = 'redis_msgpack_backend'
282 256 pass
283 257
284 258
285 259 def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
286 import redis_lock
260 from vcsserver.lib._vendor import redis_lock
287 261
288 class _RedisLockWrapper(object):
262 class _RedisLockWrapper:
289 263 """LockWrapper for redis_lock"""
290 264
291 265 @classmethod
292 266 def get_lock(cls):
293 267 return redis_lock.Lock(
294 268 redis_client=client,
295 269 name=lock_key,
296 270 expire=lock_timeout,
297 271 auto_renewal=auto_renewal,
298 272 strict=True,
299 273 )
300 274
301 275 def __repr__(self):
302 return "{}:{}".format(self.__class__.__name__, lock_key)
276 return f"{self.__class__.__name__}:{lock_key}"
303 277
304 278 def __str__(self):
305 return "{}:{}".format(self.__class__.__name__, lock_key)
279 return f"{self.__class__.__name__}:{lock_key}"
306 280
307 281 def __init__(self):
308 282 self.lock = self.get_lock()
309 283 self.lock_key = lock_key
310 284
311 285 def acquire(self, wait=True):
312 286 log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
313 287 try:
314 288 acquired = self.lock.acquire(wait)
315 289 log.debug('Got lock for key %s, %s', self.lock_key, acquired)
316 290 return acquired
317 291 except redis_lock.AlreadyAcquired:
318 292 return False
319 293 except redis_lock.AlreadyStarted:
320 294 # refresh thread exists, but it also means we acquired the lock
321 295 return True
322 296
323 297 def release(self):
324 298 try:
325 299 self.lock.release()
326 300 except redis_lock.NotAcquired:
327 301 pass
328 302
329 303 return _RedisLockWrapper()
@@ -1,26 +1,26 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import tempfile
20 20
21 21 dogpile_config_defaults = {
22 22 'cache_dir': os.path.join(tempfile.gettempdir(), 'rc_cache')
23 23 }
24 24
25 25 # GLOBAL TO STORE ALL REGISTERED REGIONS
26 26 dogpile_cache_regions = {}
@@ -1,263 +1,245 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import os
19 import time
18 import functools
20 19 import logging
21 import functools
20 import os
21 import threading
22 import time
22 23
24 import decorator
23 25 from dogpile.cache import CacheRegion
24 from dogpile.cache.util import compat
26
25 27
26 from vcsserver.utils import safe_str, sha1
28 from vcsserver.utils import sha1
29 from vcsserver.str_utils import safe_bytes
30 from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
27 31
28 from vcsserver.lib.rc_cache import region_meta
32 from . import region_meta
29 33
30 34 log = logging.getLogger(__name__)
31 35
32 36
33 37 class RhodeCodeCacheRegion(CacheRegion):
34 38
39 def __repr__(self):
40 return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
41
35 42 def conditional_cache_on_arguments(
36 43 self, namespace=None,
37 44 expiration_time=None,
38 45 should_cache_fn=None,
39 to_str=compat.string_type,
46 to_str=str,
40 47 function_key_generator=None,
41 48 condition=True):
42 49 """
43 50 Custom conditional decorator, that will not touch any dogpile internals if
44 condition isn't meet. This works a bit different than should_cache_fn
51 condition isn't meet. This works a bit different from should_cache_fn
45 52 And it's faster in cases we don't ever want to compute cached values
46 53 """
47 expiration_time_is_callable = compat.callable(expiration_time)
54 expiration_time_is_callable = callable(expiration_time)
55 if not namespace:
56 namespace = getattr(self, '_default_namespace', None)
48 57
49 58 if function_key_generator is None:
50 59 function_key_generator = self.function_key_generator
51 60
52 # workaround for py2 and cython problems, this block should be removed
53 # once we've migrated to py3
54 if 'cython' == 'cython':
55 def decorator(fn):
56 if to_str is compat.string_type:
57 # backwards compatible
58 key_generator = function_key_generator(namespace, fn)
59 else:
60 key_generator = function_key_generator(namespace, fn, to_str=to_str)
61
62 @functools.wraps(fn)
63 def decorate(*arg, **kw):
64 key = key_generator(*arg, **kw)
65
66 @functools.wraps(fn)
67 def creator():
68 return fn(*arg, **kw)
69
70 if not condition:
71 return creator()
72
73 timeout = expiration_time() if expiration_time_is_callable \
74 else expiration_time
75
76 return self.get_or_create(key, creator, timeout, should_cache_fn)
77
78 def invalidate(*arg, **kw):
79 key = key_generator(*arg, **kw)
80 self.delete(key)
81
82 def set_(value, *arg, **kw):
83 key = key_generator(*arg, **kw)
84 self.set(key, value)
85
86 def get(*arg, **kw):
87 key = key_generator(*arg, **kw)
88 return self.get(key)
89
90 def refresh(*arg, **kw):
91 key = key_generator(*arg, **kw)
92 value = fn(*arg, **kw)
93 self.set(key, value)
94 return value
95
96 decorate.set = set_
97 decorate.invalidate = invalidate
98 decorate.refresh = refresh
99 decorate.get = get
100 decorate.original = fn
101 decorate.key_generator = key_generator
102 decorate.__wrapped__ = fn
103
104 return decorate
105 return decorator
106
107 def get_or_create_for_user_func(key_generator, user_func, *arg, **kw):
61 def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
108 62
109 63 if not condition:
110 log.debug('Calling un-cached method:%s', user_func.func_name)
64 log.debug('Calling un-cached method:%s', user_func.__name__)
111 65 start = time.time()
112 66 result = user_func(*arg, **kw)
113 67 total = time.time() - start
114 log.debug('un-cached method:%s took %.4fs', user_func.func_name, total)
68 log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
115 69 return result
116 70
117 key = key_generator(*arg, **kw)
71 key = func_key_generator(*arg, **kw)
118 72
119 73 timeout = expiration_time() if expiration_time_is_callable \
120 74 else expiration_time
121 75
122 log.debug('Calling cached method:`%s`', user_func.func_name)
76 log.debug('Calling cached method:`%s`', user_func.__name__)
123 77 return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
124 78
125 79 def cache_decorator(user_func):
126 if to_str is compat.string_type:
80 if to_str is str:
127 81 # backwards compatible
128 82 key_generator = function_key_generator(namespace, user_func)
129 83 else:
130 84 key_generator = function_key_generator(namespace, user_func, to_str=to_str)
131 85
132 86 def refresh(*arg, **kw):
133 87 """
134 88 Like invalidate, but regenerates the value instead
135 89 """
136 90 key = key_generator(*arg, **kw)
137 91 value = user_func(*arg, **kw)
138 92 self.set(key, value)
139 93 return value
140 94
141 95 def invalidate(*arg, **kw):
142 96 key = key_generator(*arg, **kw)
143 97 self.delete(key)
144 98
145 99 def set_(value, *arg, **kw):
146 100 key = key_generator(*arg, **kw)
147 101 self.set(key, value)
148 102
149 103 def get(*arg, **kw):
150 104 key = key_generator(*arg, **kw)
151 105 return self.get(key)
152 106
153 107 user_func.set = set_
154 108 user_func.invalidate = invalidate
155 109 user_func.get = get
156 110 user_func.refresh = refresh
157 111 user_func.key_generator = key_generator
158 112 user_func.original = user_func
159 113
160 114 # Use `decorate` to preserve the signature of :param:`user_func`.
161 115 return decorator.decorate(user_func, functools.partial(
162 116 get_or_create_for_user_func, key_generator))
163 117
164 118 return cache_decorator
165 119
166 120
167 121 def make_region(*arg, **kw):
168 122 return RhodeCodeCacheRegion(*arg, **kw)
169 123
170 124
171 125 def get_default_cache_settings(settings, prefixes=None):
172 126 prefixes = prefixes or []
173 127 cache_settings = {}
174 128 for key in settings.keys():
175 129 for prefix in prefixes:
176 130 if key.startswith(prefix):
177 131 name = key.split(prefix)[1].strip()
178 132 val = settings[key]
179 if isinstance(val, compat.string_types):
133 if isinstance(val, str):
180 134 val = val.strip()
181 135 cache_settings[name] = val
182 136 return cache_settings
183 137
184 138
185 139 def compute_key_from_params(*args):
186 140 """
187 141 Helper to compute key from given params to be used in cache manager
188 142 """
189 return sha1("_".join(map(safe_str, args)))
143 return sha1(safe_bytes("_".join(map(str, args))))
144
145
146 def custom_key_generator(backend, namespace, fn):
147 func_name = fn.__name__
148
149 def generate_key(*args):
150 backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
151 namespace_pref = namespace or 'default_namespace'
152 arg_key = compute_key_from_params(*args)
153 final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
154
155 return final_key
156
157 return generate_key
190 158
191 159
192 160 def backend_key_generator(backend):
193 161 """
194 162 Special wrapper that also sends over the backend to the key generator
195 163 """
196 164 def wrapper(namespace, fn):
197 return key_generator(backend, namespace, fn)
165 return custom_key_generator(backend, namespace, fn)
198 166 return wrapper
199 167
200 168
201 def key_generator(backend, namespace, fn):
202 fname = fn.__name__
169 def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
170 from .backends import FileNamespaceBackend
171 from . import async_creation_runner
203 172
204 def generate_key(*args):
205 backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix'
206 namespace_pref = namespace or 'default_namespace'
207 arg_key = compute_key_from_params(*args)
208 final_key = "{}:{}:{}_{}".format(backend_prefix, namespace_pref, fname, arg_key)
209
210 return final_key
211
212 return generate_key
213
214
215 def get_or_create_region(region_name, region_namespace=None):
216 from vcsserver.lib.rc_cache.backends import FileNamespaceBackend
217 173 region_obj = region_meta.dogpile_cache_regions.get(region_name)
218 174 if not region_obj:
219 raise EnvironmentError(
220 'Region `{}` not in configured: {}.'.format(
221 region_name, region_meta.dogpile_cache_regions.keys()))
175 reg_keys = list(region_meta.dogpile_cache_regions.keys())
176 raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
177
178 region_uid_name = f'{region_name}:{region_namespace}'
222 179
223 region_uid_name = '{}:{}'.format(region_name, region_namespace)
180 # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
224 181 if isinstance(region_obj.actual_backend, FileNamespaceBackend):
182 if not region_namespace:
183 raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
184
225 185 region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
226 186 if region_exist:
227 187 log.debug('Using already configured region: %s', region_namespace)
228 188 return region_exist
229 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
189
230 190 expiration_time = region_obj.expiration_time
231 191
232 if not os.path.isdir(cache_dir):
233 os.makedirs(cache_dir)
192 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
193 namespace_cache_dir = cache_dir
194
195 # we default the namespace_cache_dir to our default cache dir.
196 # however, if this backend is configured with filename= param, we prioritize that
197 # so all caches within that particular region, even those namespaced end up in the same path
198 if region_obj.actual_backend.filename:
199 namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
200
201 if not os.path.isdir(namespace_cache_dir):
202 os.makedirs(namespace_cache_dir)
234 203 new_region = make_region(
235 204 name=region_uid_name,
236 205 function_key_generator=backend_key_generator(region_obj.actual_backend)
237 206 )
207
238 208 namespace_filename = os.path.join(
239 cache_dir, "{}.cache.dbm".format(region_namespace))
209 namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
240 210 # special type that allows 1db per namespace
241 211 new_region.configure(
242 212 backend='dogpile.cache.rc.file_namespace',
243 213 expiration_time=expiration_time,
244 214 arguments={"filename": namespace_filename}
245 215 )
246 216
247 217 # create and save in region caches
248 218 log.debug('configuring new region: %s', region_uid_name)
249 219 region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
250 220
221 region_obj._default_namespace = region_namespace
222 if use_async_runner:
223 region_obj.async_creation_runner = async_creation_runner
251 224 return region_obj
252 225
253 226
254 def clear_cache_namespace(cache_region, cache_namespace_uid, invalidate=False):
255 region = get_or_create_region(cache_region, cache_namespace_uid)
256 cache_keys = region.backend.list_keys(prefix=cache_namespace_uid)
257 num_delete_keys = len(cache_keys)
258 if invalidate:
259 region.invalidate(hard=False)
260 else:
261 if num_delete_keys:
262 region.delete_multi(cache_keys)
263 return num_delete_keys
227 def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
228 from . import CLEAR_DELETE, CLEAR_INVALIDATE
229
230 if not isinstance(cache_region, RhodeCodeCacheRegion):
231 cache_region = get_or_create_region(cache_region, cache_namespace_uid)
232 log.debug('clearing cache region: %s [prefix:%s] with method=%s',
233 cache_region, cache_namespace_uid, method)
234
235 num_affected_keys = 0
236
237 if method == CLEAR_INVALIDATE:
238 # NOTE: The CacheRegion.invalidate() method’s default mode of
239 # operation is to set a timestamp local to this CacheRegion in this Python process only.
240 # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
241 cache_region.invalidate(hard=True)
242
243 if method == CLEAR_DELETE:
244 num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
245 return num_affected_keys
@@ -1,27 +1,25 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18
21 19 counter = 0
22 20
23 21
24 22 def get_request_counter(request):
25 23 global counter
26 24 counter += 1
27 25 return counter
@@ -1,386 +1,417 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 import simplejson as json
25 24 import dulwich.protocol
25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 from vcsserver.lib.rc_json import json
28 29 from vcsserver import hooks, subprocessio
30 from vcsserver.str_utils import ascii_bytes
29 31
30 32
31 33 log = logging.getLogger(__name__)
32 34
33 35
34 class FileWrapper(object):
36 class FileWrapper:
35 37 """File wrapper that ensures how much data is read from it."""
36 38
37 39 def __init__(self, fd, content_length):
38 40 self.fd = fd
39 41 self.content_length = content_length
40 42 self.remain = content_length
41 43
42 44 def read(self, size):
43 45 if size <= self.remain:
44 46 try:
45 47 data = self.fd.read(size)
46 48 except socket.error:
47 49 raise IOError(self)
48 50 self.remain -= size
49 51 elif self.remain:
50 52 data = self.fd.read(self.remain)
51 53 self.remain = 0
52 54 else:
53 55 data = None
54 56 return data
55 57
56 58 def __repr__(self):
57 return '<FileWrapper %s len: %s, read: %s>' % (
59 return '<FileWrapper {} len: {}, read: {}>'.format(
58 60 self.fd, self.content_length, self.content_length - self.remain
59 61 )
60 62
61 63
62 class GitRepository(object):
64 class GitRepository:
63 65 """WSGI app for handling Git smart protocol endpoints."""
64 66
65 git_folder_signature = frozenset(
66 ('config', 'head', 'info', 'objects', 'refs'))
67 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
67 68 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
68 valid_accepts = frozenset(('application/x-%s-result' %
69 c for c in commands))
69 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
74 '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
73 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
74 b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
76 FLUSH_PACKET = b"0000"
77 77
78 def __init__(self, repo_name, content_path, git_path, update_server_info,
79 extras):
78 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
79
80 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
80 81 files = frozenset(f.lower() for f in os.listdir(content_path))
81 82 valid_dir_signature = self.git_folder_signature.issubset(files)
82 83
83 84 if not valid_dir_signature:
84 raise OSError('%s missing git signature' % content_path)
85 raise OSError(f'{content_path} missing git signature')
85 86
86 87 self.content_path = content_path
87 88 self.repo_name = repo_name
88 89 self.extras = extras
89 90 self.git_path = git_path
90 91 self.update_server_info = update_server_info
91 92
92 93 def _get_fixedpath(self, path):
93 94 """
94 95 Small fix for repo_path
95 96
96 97 :param path:
97 98 """
98 99 path = path.split(self.repo_name, 1)[-1]
99 100 if path.startswith('.git'):
100 101 # for bare repos we still get the .git prefix inside, we skip it
101 102 # here, and remove from the service command
102 103 path = path[4:]
103 104
104 105 return path.strip('/')
105 106
106 107 def inforefs(self, request, unused_environ):
107 108 """
108 109 WSGI Response producer for HTTP GET Git Smart
109 110 HTTP /info/refs request.
110 111 """
111 112
112 113 git_command = request.GET.get('service')
113 114 if git_command not in self.commands:
114 115 log.debug('command %s not allowed', git_command)
115 116 return exc.HTTPForbidden()
116 117
117 118 # please, resist the urge to add '\n' to git capture and increment
118 119 # line count by 1.
119 120 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
120 121 # a part of protocol.
121 122 # The code in Git client not only does NOT need '\n', but actually
122 123 # blows up if you sprinkle "flush" (0000) as "0001\n".
123 124 # It reads binary, per number of bytes specified.
124 125 # if you do add '\n' as part of data, count it.
125 server_advert = '# service=%s\n' % git_command
126 packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
126 server_advert = f'# service={git_command}\n'
127 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
127 128 try:
128 129 gitenv = dict(os.environ)
129 130 # forget all configs
130 131 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
131 132 command = [self.git_path, git_command[4:], '--stateless-rpc',
132 133 '--advertise-refs', self.content_path]
133 134 out = subprocessio.SubprocessIOChunker(
134 135 command,
135 136 env=gitenv,
136 starting_values=[packet_len + server_advert + '0000'],
137 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
137 138 shell=False
138 139 )
139 except EnvironmentError:
140 except OSError:
140 141 log.exception('Error processing command')
141 142 raise exc.HTTPExpectationFailed()
142 143
143 144 resp = Response()
144 resp.content_type = 'application/x-%s-advertisement' % str(git_command)
145 resp.content_type = f'application/x-{git_command}-advertisement'
145 146 resp.charset = None
146 147 resp.app_iter = out
147 148
148 149 return resp
149 150
150 151 def _get_want_capabilities(self, request):
151 152 """Read the capabilities found in the first want line of the request."""
152 153 pos = request.body_file_seekable.tell()
153 154 first_line = request.body_file_seekable.readline()
154 155 request.body_file_seekable.seek(pos)
155 156
156 157 return frozenset(
157 158 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
158 159
159 160 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
160 161 """
161 162 Construct a response with an empty PACK file.
162 163
163 164 We use an empty PACK file, as that would trigger the failure of the pull
164 165 or clone command.
165 166
166 167 We also print in the error output a message explaining why the command
167 168 was aborted.
168 169
169 If aditionally, the user is accepting messages we send them the output
170 If additionally, the user is accepting messages we send them the output
170 171 of the pre-pull hook.
171 172
172 173 Note that for clients not supporting side-band we just send them the
173 174 emtpy PACK file.
174 175 """
176
175 177 if self.SIDE_BAND_CAPS.intersection(capabilities):
176 178 response = []
177 179 proto = dulwich.protocol.Protocol(None, response.append)
178 proto.write_pkt_line('NAK\n')
179 self._write_sideband_to_proto(pre_pull_messages, proto,
180 capabilities)
180 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
181
182 self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
181 183 # N.B.(skreft): Do not change the sideband channel to 3, as that
182 184 # produces a fatal error in the client:
183 185 # fatal: error in sideband demultiplexer
184 proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
185 proto.write_sideband(1, self.EMPTY_PACK)
186 proto.write_sideband(
187 dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
188 ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
189 proto.write_sideband(
190 dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
191 ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
186 192
187 # writes 0000
193 # writes b"0000" as default
188 194 proto.write_pkt_line(None)
189 195
190 196 return response
191 197 else:
192 return [self.EMPTY_PACK]
198 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
199
200 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
201 """
202 Given a list response we inject the post-pull messages.
203
204 We only inject the messages if the client supports sideband, and the
205 response has the format:
206 0008NAK\n...0000
207
208 Note that we do not check the no-progress capability as by default, git
209 sends it, which effectively would block all messages.
210 """
211
212 if not self.SIDE_BAND_CAPS.intersection(capabilities):
213 return response
214
215 if not start_message and not end_message:
216 return response
217
218 try:
219 iter(response)
220 # iterator probably will work, we continue
221 except TypeError:
222 raise TypeError(f'response must be an iterator: got {type(response)}')
223 if isinstance(response, (list, tuple)):
224 raise TypeError(f'response must be an iterator: got {type(response)}')
225
226 def injected_response():
193 227
194 def _write_sideband_to_proto(self, data, proto, capabilities):
228 do_loop = 1
229 header_injected = 0
230 next_item = None
231 has_item = False
232 item = b''
233
234 while do_loop:
235
236 try:
237 next_item = next(response)
238 except StopIteration:
239 do_loop = 0
240
241 if has_item:
242 # last item ! alter it now
243 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
244 new_response = [item[:-4]]
245 new_response.extend(self._get_messages(end_message, capabilities))
246 new_response.append(self.FLUSH_PACKET)
247 item = b''.join(new_response)
248
249 yield item
250
251 has_item = True
252 item = next_item
253
254 # alter item if it's the initial chunk
255 if not header_injected and item.startswith(b'0008NAK\n'):
256 new_response = [b'0008NAK\n']
257 new_response.extend(self._get_messages(start_message, capabilities))
258 new_response.append(item[8:])
259 item = b''.join(new_response)
260 header_injected = 1
261
262 return injected_response()
263
264 def _write_sideband_to_proto(self, proto, data, capabilities):
195 265 """
196 Write the data to the proto's sideband number 2.
266 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
197 267
198 268 We do not use dulwich's write_sideband directly as it only supports
199 269 side-band-64k.
200 270 """
201 271 if not data:
202 272 return
203 273
204 274 # N.B.(skreft): The values below are explained in the pack protocol
205 275 # documentation, section Packfile Data.
206 276 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
207 if 'side-band-64k' in capabilities:
277 if CAPABILITY_SIDE_BAND_64K in capabilities:
208 278 chunk_size = 65515
209 elif 'side-band' in capabilities:
279 elif CAPABILITY_SIDE_BAND in capabilities:
210 280 chunk_size = 995
211 281 else:
212 282 return
213 283
214 chunker = (
215 data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
284 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
216 285
217 286 for chunk in chunker:
218 proto.write_sideband(2, chunk)
287 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
219 288
220 289 def _get_messages(self, data, capabilities):
221 290 """Return a list with packets for sending data in sideband number 2."""
222 291 response = []
223 292 proto = dulwich.protocol.Protocol(None, response.append)
224 293
225 self._write_sideband_to_proto(data, proto, capabilities)
294 self._write_sideband_to_proto(proto, data, capabilities)
226 295
227 296 return response
228 297
229 def _inject_messages_to_response(self, response, capabilities,
230 start_messages, end_messages):
231 """
232 Given a list response we inject the pre/post-pull messages.
233
234 We only inject the messages if the client supports sideband, and the
235 response has the format:
236 0008NAK\n...0000
237
238 Note that we do not check the no-progress capability as by default, git
239 sends it, which effectively would block all messages.
240 """
241 if not self.SIDE_BAND_CAPS.intersection(capabilities):
242 return response
243
244 if not start_messages and not end_messages:
245 return response
246
247 # make a list out of response if it's an iterator
248 # so we can investigate it for message injection.
249 if hasattr(response, '__iter__'):
250 response = list(response)
251
252 if (not response[0].startswith('0008NAK\n') or
253 not response[-1].endswith('0000')):
254 return response
255
256 new_response = ['0008NAK\n']
257 new_response.extend(self._get_messages(start_messages, capabilities))
258 if len(response) == 1:
259 new_response.append(response[0][8:-4])
260 else:
261 new_response.append(response[0][8:])
262 new_response.extend(response[1:-1])
263 new_response.append(response[-1][:-4])
264 new_response.extend(self._get_messages(end_messages, capabilities))
265 new_response.append('0000')
266
267 return new_response
268
269 298 def backend(self, request, environ):
270 299 """
271 300 WSGI Response producer for HTTP POST Git Smart HTTP requests.
272 301 Reads commands and data from HTTP POST's body.
273 302 returns an iterator obj with contents of git command's
274 303 response to stdout
275 304 """
276 305 # TODO(skreft): think how we could detect an HTTPLockedException, as
277 306 # we probably want to have the same mechanism used by mercurial and
278 307 # simplevcs.
279 308 # For that we would need to parse the output of the command looking for
280 309 # some signs of the HTTPLockedError, parse the data and reraise it in
281 310 # pygrack. However, that would interfere with the streaming.
282 311 #
283 312 # Now the output of a blocked push is:
284 313 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
285 314 # POST git-receive-pack (1047 bytes)
286 315 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
287 316 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
288 317 # ! [remote rejected] master -> master (pre-receive hook declined)
289 318 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
290 319
291 320 git_command = self._get_fixedpath(request.path_info)
292 321 if git_command not in self.commands:
293 322 log.debug('command %s not allowed', git_command)
294 323 return exc.HTTPForbidden()
295 324
296 325 capabilities = None
297 326 if git_command == 'git-upload-pack':
298 327 capabilities = self._get_want_capabilities(request)
299 328
300 329 if 'CONTENT_LENGTH' in environ:
301 330 inputstream = FileWrapper(request.body_file_seekable,
302 331 request.content_length)
303 332 else:
304 333 inputstream = request.body_file_seekable
305 334
306 335 resp = Response()
307 resp.content_type = ('application/x-%s-result' %
308 git_command.encode('utf8'))
336 resp.content_type = f'application/x-{git_command}-result'
309 337 resp.charset = None
310 338
311 339 pre_pull_messages = ''
340 # Upload-pack == clone
312 341 if git_command == 'git-upload-pack':
313 status, pre_pull_messages = hooks.git_pre_pull(self.extras)
314 if status != 0:
342 hook_response = hooks.git_pre_pull(self.extras)
343 if hook_response.status != 0:
344 pre_pull_messages = hook_response.output
315 345 resp.app_iter = self._build_failed_pre_pull_response(
316 346 capabilities, pre_pull_messages)
317 347 return resp
318 348
319 349 gitenv = dict(os.environ)
320 350 # forget all configs
321 351 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
322 352 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
323 353 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
324 354 self.content_path]
325 355 log.debug('handling cmd %s', cmd)
326 356
327 357 out = subprocessio.SubprocessIOChunker(
328 358 cmd,
329 inputstream=inputstream,
359 input_stream=inputstream,
330 360 env=gitenv,
331 361 cwd=self.content_path,
332 362 shell=False,
333 363 fail_on_stderr=False,
334 364 fail_on_return_code=False
335 365 )
336 366
337 367 if self.update_server_info and git_command == 'git-receive-pack':
338 368 # We need to fully consume the iterator here, as the
339 369 # update-server-info command needs to be run after the push.
340 370 out = list(out)
341 371
342 372 # Updating refs manually after each push.
343 373 # This is required as some clients are exposing Git repos internally
344 374 # with the dumb protocol.
345 375 cmd = [self.git_path, 'update-server-info']
346 376 log.debug('handling cmd %s', cmd)
347 377 output = subprocessio.SubprocessIOChunker(
348 378 cmd,
349 inputstream=inputstream,
379 input_stream=inputstream,
350 380 env=gitenv,
351 381 cwd=self.content_path,
352 382 shell=False,
353 383 fail_on_stderr=False,
354 384 fail_on_return_code=False
355 385 )
356 386 # Consume all the output so the subprocess finishes
357 387 for _ in output:
358 388 pass
359 389
390 # Upload-pack == clone
360 391 if git_command == 'git-upload-pack':
361 unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
362 resp.app_iter = self._inject_messages_to_response(
363 out, capabilities, pre_pull_messages, post_pull_messages)
392 hook_response = hooks.git_post_pull(self.extras)
393 post_pull_messages = hook_response.output
394 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
364 395 else:
365 396 resp.app_iter = out
366 397
367 398 return resp
368 399
369 400 def __call__(self, environ, start_response):
370 401 request = Request(environ)
371 402 _path = self._get_fixedpath(request.path_info)
372 403 if _path.startswith('info/refs'):
373 404 app = self.inforefs
374 405 else:
375 406 app = self.backend
376 407
377 408 try:
378 409 resp = app(request, environ)
379 410 except exc.HTTPException as error:
380 411 log.exception('HTTP Error')
381 412 resp = error
382 413 except Exception:
383 414 log.exception('Unknown error')
384 415 resp = exc.HTTPInternalServerError()
385 416
386 417 return resp(environ, start_response)
This diff has been collapsed as it changes many lines, (787 lines changed) Show them Hide them
@@ -1,1281 +1,1518 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 import posixpath as vcspath
22 21 import re
23 22 import stat
24 23 import traceback
25 import urllib
26 import urllib2
24 import urllib.request
25 import urllib.parse
26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
30 30 import pygit2
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 from dulwich.server import update_server_info
41 40
41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.utils import safe_str, safe_int, safe_unicode
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
48 48 from vcsserver.vcs_base import RemoteBase
49 49
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 PEELED_REF_MARKER = '^{}'
54
53 PEELED_REF_MARKER = b'^{}'
54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 def str_to_dulwich(value):
60 """
61 Dulwich 0.10.1a requires `unicode` objects to be passed in.
62 """
63 return value.decode(settings.WIRE_ENCODING)
64
65
66 59 def reraise_safe_exceptions(func):
67 60 """Converts Dulwich exceptions to something neutral."""
68 61
69 62 @wraps(func)
70 63 def wrapper(*args, **kwargs):
71 64 try:
72 65 return func(*args, **kwargs)
73 66 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
74 67 exc = exceptions.LookupException(org_exc=e)
75 68 raise exc(safe_str(e))
76 69 except (HangupException, UnexpectedCommandError) as e:
77 70 exc = exceptions.VcsException(org_exc=e)
78 71 raise exc(safe_str(e))
79 except Exception as e:
80 # NOTE(marcink): becuase of how dulwich handles some exceptions
72 except Exception:
73 # NOTE(marcink): because of how dulwich handles some exceptions
81 74 # (KeyError on empty repos), we cannot track this and catch all
82 75 # exceptions, it's an exceptions from other handlers
83 76 #if not hasattr(e, '_vcs_kind'):
84 77 #log.exception("Unhandled exception in git remote call")
85 78 #raise_from_original(exceptions.UnhandledException)
86 79 raise
87 80 return wrapper
88 81
89 82
90 83 class Repo(DulwichRepo):
91 84 """
92 85 A wrapper for dulwich Repo class.
93 86
94 87 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
95 88 "Too many open files" error. We need to close all opened file descriptors
96 89 once the repo object is destroyed.
97 90 """
98 91 def __del__(self):
99 92 if hasattr(self, 'object_store'):
100 93 self.close()
101 94
102 95
103 96 class Repository(LibGit2Repo):
104 97
105 98 def __enter__(self):
106 99 return self
107 100
108 101 def __exit__(self, exc_type, exc_val, exc_tb):
109 102 self.free()
110 103
111 104
112 105 class GitFactory(RepoFactory):
113 106 repo_type = 'git'
114 107
115 108 def _create_repo(self, wire, create, use_libgit2=False):
116 109 if use_libgit2:
117 return Repository(wire['path'])
110 repo = Repository(safe_bytes(wire['path']))
118 111 else:
119 repo_path = str_to_dulwich(wire['path'])
120 return Repo(repo_path)
112 # dulwich mode
113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 repo = Repo(repo_path)
115
116 log.debug('repository created: got GIT object: %s', repo)
117 return repo
121 118
122 119 def repo(self, wire, create=False, use_libgit2=False):
123 120 """
124 121 Get a repository instance for the given path.
125 122 """
126 123 return self._create_repo(wire, create, use_libgit2)
127 124
128 125 def repo_libgit2(self, wire):
129 126 return self.repo(wire, use_libgit2=True)
130 127
131 128
129 def create_signature_from_string(author_str, **kwargs):
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
139
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
142
143
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
149
150
132 151 class GitRemote(RemoteBase):
133 152
134 153 def __init__(self, factory):
135 154 self._factory = factory
136 155 self._bulk_methods = {
137 156 "date": self.date,
138 157 "author": self.author,
139 158 "branch": self.branch,
140 159 "message": self.message,
141 160 "parents": self.parents,
142 161 "_commit": self.revision,
143 162 }
163 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 }
144 170
145 171 def _wire_to_config(self, wire):
146 172 if 'config' in wire:
147 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
148 174 return {}
149 175
150 176 def _remote_conf(self, config):
151 177 params = [
152 178 '-c', 'core.askpass=""',
153 179 ]
154 ssl_cert_dir = config.get('vcs_ssl_dir')
155 if ssl_cert_dir:
156 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
180 config_attrs = {
181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 }
184 for key, param in config_attrs.items():
185 if value := config.get(key):
186 params.extend(['-c', param.format(value)])
157 187 return params
158 188
159 189 @reraise_safe_exceptions
160 190 def discover_git_version(self):
161 191 stdout, _ = self.run_git_command(
162 192 {}, ['--version'], _bare=True, _safe=True)
163 prefix = 'git version'
193 prefix = b'git version'
164 194 if stdout.startswith(prefix):
165 195 stdout = stdout[len(prefix):]
166 return stdout.strip()
196 return safe_str(stdout.strip())
167 197
168 198 @reraise_safe_exceptions
169 199 def is_empty(self, wire):
170 200 repo_init = self._factory.repo_libgit2(wire)
171 201 with repo_init as repo:
172
173 202 try:
174 203 has_head = repo.head.name
175 204 if has_head:
176 205 return False
177 206
178 207 # NOTE(marcink): check again using more expensive method
179 208 return repo.is_empty
180 209 except Exception:
181 210 pass
182 211
183 212 return True
184 213
185 214 @reraise_safe_exceptions
186 215 def assert_correct_path(self, wire):
187 216 cache_on, context_uid, repo_id = self._cache_on(wire)
188 217 region = self._region(wire)
218
189 219 @region.conditional_cache_on_arguments(condition=cache_on)
190 def _assert_correct_path(_context_uid, _repo_id):
191 try:
192 repo_init = self._factory.repo_libgit2(wire)
193 with repo_init as repo:
194 pass
195 except pygit2.GitError:
196 path = wire.get('path')
197 tb = traceback.format_exc()
198 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 if fast_check:
222 path = safe_str(wire['path'])
223 if pygit2.discover_repository(path):
224 return True
199 225 return False
226 else:
227 try:
228 repo_init = self._factory.repo_libgit2(wire)
229 with repo_init:
230 pass
231 except pygit2.GitError:
232 path = wire.get('path')
233 tb = traceback.format_exc()
234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 return False
236 return True
200 237
201 return True
202 return _assert_correct_path(context_uid, repo_id)
238 return _assert_correct_path(context_uid, repo_id, True)
203 239
204 240 @reraise_safe_exceptions
205 241 def bare(self, wire):
206 242 repo_init = self._factory.repo_libgit2(wire)
207 243 with repo_init as repo:
208 244 return repo.is_bare
209 245
210 246 @reraise_safe_exceptions
247 def get_node_data(self, wire, commit_id, path):
248 repo_init = self._factory.repo_libgit2(wire)
249 with repo_init as repo:
250 commit = repo[commit_id]
251 blob_obj = commit.tree[path]
252
253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 raise exceptions.LookupException()(
255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256
257 return BytesEnvelope(blob_obj.data)
258
259 @reraise_safe_exceptions
260 def get_node_size(self, wire, commit_id, path):
261 repo_init = self._factory.repo_libgit2(wire)
262 with repo_init as repo:
263 commit = repo[commit_id]
264 blob_obj = commit.tree[path]
265
266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 raise exceptions.LookupException()(
268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269
270 return blob_obj.size
271
272 @reraise_safe_exceptions
273 def get_node_flags(self, wire, commit_id, path):
274 repo_init = self._factory.repo_libgit2(wire)
275 with repo_init as repo:
276 commit = repo[commit_id]
277 blob_obj = commit.tree[path]
278
279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 raise exceptions.LookupException()(
281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282
283 return blob_obj.filemode
284
285 @reraise_safe_exceptions
286 def get_node_is_binary(self, wire, commit_id, path):
287 repo_init = self._factory.repo_libgit2(wire)
288 with repo_init as repo:
289 commit = repo[commit_id]
290 blob_obj = commit.tree[path]
291
292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 raise exceptions.LookupException()(
294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295
296 return blob_obj.is_binary
297
298 @reraise_safe_exceptions
211 299 def blob_as_pretty_string(self, wire, sha):
212 300 repo_init = self._factory.repo_libgit2(wire)
213 301 with repo_init as repo:
214 302 blob_obj = repo[sha]
215 blob = blob_obj.data
216 return blob
303 return BytesEnvelope(blob_obj.data)
217 304
218 305 @reraise_safe_exceptions
219 306 def blob_raw_length(self, wire, sha):
220 307 cache_on, context_uid, repo_id = self._cache_on(wire)
221 308 region = self._region(wire)
309
222 310 @region.conditional_cache_on_arguments(condition=cache_on)
223 311 def _blob_raw_length(_repo_id, _sha):
224 312
225 313 repo_init = self._factory.repo_libgit2(wire)
226 314 with repo_init as repo:
227 315 blob = repo[sha]
228 316 return blob.size
229 317
230 318 return _blob_raw_length(repo_id, sha)
231 319
232 320 def _parse_lfs_pointer(self, raw_content):
321 spec_string = b'version https://git-lfs.github.com/spec'
322 if raw_content and raw_content.startswith(spec_string):
233 323
234 spec_string = 'version https://git-lfs.github.com/spec'
235 if raw_content and raw_content.startswith(spec_string):
236 pattern = re.compile(r"""
324 pattern = re.compile(rb"""
237 325 (?:\n)?
238 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
239 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
240 328 ^size[ ](?P<oid_size>[0-9]+)\n
241 329 (?:\n)?
242 330 """, re.VERBOSE | re.MULTILINE)
243 331 match = pattern.match(raw_content)
244 332 if match:
245 333 return match.groupdict()
246 334
247 335 return {}
248 336
249 337 @reraise_safe_exceptions
250 338 def is_large_file(self, wire, commit_id):
251 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 region = self._region(wire)
252 341
253 region = self._region(wire)
254 342 @region.conditional_cache_on_arguments(condition=cache_on)
255 343 def _is_large_file(_repo_id, _sha):
256 344 repo_init = self._factory.repo_libgit2(wire)
257 345 with repo_init as repo:
258 346 blob = repo[commit_id]
259 347 if blob.is_binary:
260 348 return {}
261 349
262 350 return self._parse_lfs_pointer(blob.data)
263 351
264 352 return _is_large_file(repo_id, commit_id)
265 353
266 354 @reraise_safe_exceptions
267 355 def is_binary(self, wire, tree_id):
268 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 region = self._region(wire)
269 358
270 region = self._region(wire)
271 359 @region.conditional_cache_on_arguments(condition=cache_on)
272 360 def _is_binary(_repo_id, _tree_id):
273 361 repo_init = self._factory.repo_libgit2(wire)
274 362 with repo_init as repo:
275 363 blob_obj = repo[tree_id]
276 364 return blob_obj.is_binary
277 365
278 366 return _is_binary(repo_id, tree_id)
279 367
280 368 @reraise_safe_exceptions
369 def md5_hash(self, wire, commit_id, path):
370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 region = self._region(wire)
372
373 @region.conditional_cache_on_arguments(condition=cache_on)
374 def _md5_hash(_repo_id, _commit_id, _path):
375 repo_init = self._factory.repo_libgit2(wire)
376 with repo_init as repo:
377 commit = repo[_commit_id]
378 blob_obj = commit.tree[_path]
379
380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 raise exceptions.LookupException()(
382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383
384 return ''
385
386 return _md5_hash(repo_id, commit_id, path)
387
388 @reraise_safe_exceptions
281 389 def in_largefiles_store(self, wire, oid):
282 390 conf = self._wire_to_config(wire)
283 391 repo_init = self._factory.repo_libgit2(wire)
284 392 with repo_init as repo:
285 393 repo_name = repo.path
286 394
287 395 store_location = conf.get('vcs_git_lfs_store_location')
288 396 if store_location:
289 397
290 398 store = LFSOidStore(
291 399 oid=oid, repo=repo_name, store_location=store_location)
292 400 return store.has_oid()
293 401
294 402 return False
295 403
296 404 @reraise_safe_exceptions
297 405 def store_path(self, wire, oid):
298 406 conf = self._wire_to_config(wire)
299 407 repo_init = self._factory.repo_libgit2(wire)
300 408 with repo_init as repo:
301 409 repo_name = repo.path
302 410
303 411 store_location = conf.get('vcs_git_lfs_store_location')
304 412 if store_location:
305 413 store = LFSOidStore(
306 414 oid=oid, repo=repo_name, store_location=store_location)
307 415 return store.oid_path
308 raise ValueError('Unable to fetch oid with path {}'.format(oid))
416 raise ValueError(f'Unable to fetch oid with path {oid}')
309 417
310 418 @reraise_safe_exceptions
311 419 def bulk_request(self, wire, rev, pre_load):
312 420 cache_on, context_uid, repo_id = self._cache_on(wire)
313 421 region = self._region(wire)
422
314 423 @region.conditional_cache_on_arguments(condition=cache_on)
315 424 def _bulk_request(_repo_id, _rev, _pre_load):
316 425 result = {}
317 426 for attr in pre_load:
318 427 try:
319 428 method = self._bulk_methods[attr]
429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
320 430 args = [wire, rev]
321 431 result[attr] = method(*args)
322 432 except KeyError as e:
323 raise exceptions.VcsException(e)(
324 "Unknown bulk attribute: %s" % attr)
433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
325 434 return result
326 435
327 436 return _bulk_request(repo_id, rev, sorted(pre_load))
328 437
329 def _build_opener(self, url):
438 @reraise_safe_exceptions
439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 region = self._region(wire)
442
443 @region.conditional_cache_on_arguments(condition=cache_on)
444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 result = {}
446 for attr in pre_load:
447 try:
448 method = self._bulk_file_methods[attr]
449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 result[attr] = method(wire, _commit_id, _path)
451 except KeyError as e:
452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 return result
454
455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456
457 def _build_opener(self, url: str):
330 458 handlers = []
331 url_obj = url_parser(url)
332 _, authinfo = url_obj.authinfo()
459 url_obj = url_parser(safe_bytes(url))
460 authinfo = url_obj.authinfo()[1]
333 461
334 462 if authinfo:
335 463 # create a password manager
336 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
337 passmgr.add_password(*authinfo)
464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 passmgr.add_password(*convert_to_str(authinfo))
338 466
339 467 handlers.extend((httpbasicauthhandler(passmgr),
340 468 httpdigestauthhandler(passmgr)))
341 469
342 return urllib2.build_opener(*handlers)
343
344 def _type_id_to_name(self, type_id):
345 return {
346 1: b'commit',
347 2: b'tree',
348 3: b'blob',
349 4: b'tag'
350 }[type_id]
470 return urllib.request.build_opener(*handlers)
351 471
352 472 @reraise_safe_exceptions
353 473 def check_url(self, url, config):
354 url_obj = url_parser(url)
355 test_uri, _ = url_obj.authinfo()
356 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
357 url_obj.query = obfuscate_qs(url_obj.query)
358 cleaned_uri = str(url_obj)
359 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
474 url_obj = url_parser(safe_bytes(url))
475
476 test_uri = safe_str(url_obj.authinfo()[0])
477 obfuscated_uri = get_obfuscated_url(url_obj)
478
479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
360 480
361 481 if not test_uri.endswith('info/refs'):
362 482 test_uri = test_uri.rstrip('/') + '/info/refs'
363 483
364 o = self._build_opener(url)
484 o = self._build_opener(url=url)
365 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
366 486
367 487 q = {"service": 'git-upload-pack'}
368 qs = '?%s' % urllib.urlencode(q)
369 cu = "%s%s" % (test_uri, qs)
370 req = urllib2.Request(cu, None, {})
488 qs = f'?{urllib.parse.urlencode(q)}'
489 cu = f"{test_uri}{qs}"
371 490
372 491 try:
373 log.debug("Trying to open URL %s", cleaned_uri)
492 req = urllib.request.Request(cu, None, {})
493 log.debug("Trying to open URL %s", obfuscated_uri)
374 494 resp = o.open(req)
375 495 if resp.code != 200:
376 496 raise exceptions.URLError()('Return Code is not 200')
377 497 except Exception as e:
378 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
379 499 # means it cannot be cloned
380 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
381 501
382 502 # now detect if it's proper git repo
383 gitdata = resp.read()
384 if 'service=git-upload-pack' in gitdata:
503 gitdata: bytes = resp.read()
504
505 if b'service=git-upload-pack' in gitdata:
385 506 pass
386 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
387 # old style git can return some other format !
507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 # old style git can return some other format!
388 509 pass
389 510 else:
390 raise exceptions.URLError()(
391 "url [%s] does not look like an git" % (cleaned_uri,))
511 e = None
512 raise exceptions.URLError(e)(
513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
392 514
393 515 return True
394 516
395 517 @reraise_safe_exceptions
396 518 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
397 519 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
398 520 remote_refs = self.pull(wire, url, apply_refs=False)
399 521 repo = self._factory.repo(wire)
400 522 if isinstance(valid_refs, list):
401 523 valid_refs = tuple(valid_refs)
402 524
403 525 for k in remote_refs:
404 526 # only parse heads/tags and skip so called deferred tags
405 527 if k.startswith(valid_refs) and not k.endswith(deferred):
406 528 repo[k] = remote_refs[k]
407 529
408 530 if update_after_clone:
409 531 # we want to checkout HEAD
410 532 repo["HEAD"] = remote_refs["HEAD"]
411 533 index.build_index_from_tree(repo.path, repo.index_path(),
412 534 repo.object_store, repo["HEAD"].tree)
413 535
414 536 @reraise_safe_exceptions
415 537 def branch(self, wire, commit_id):
416 538 cache_on, context_uid, repo_id = self._cache_on(wire)
417 539 region = self._region(wire)
540
418 541 @region.conditional_cache_on_arguments(condition=cache_on)
419 542 def _branch(_context_uid, _repo_id, _commit_id):
420 543 regex = re.compile('^refs/heads')
421 544
422 545 def filter_with(ref):
423 546 return regex.match(ref[0]) and ref[1] == _commit_id
424 547
425 branches = filter(filter_with, self.get_refs(wire).items())
548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
426 549 return [x[0].split('refs/heads/')[-1] for x in branches]
427 550
428 551 return _branch(context_uid, repo_id, commit_id)
429 552
430 553 @reraise_safe_exceptions
431 554 def commit_branches(self, wire, commit_id):
432 555 cache_on, context_uid, repo_id = self._cache_on(wire)
433 556 region = self._region(wire)
557
434 558 @region.conditional_cache_on_arguments(condition=cache_on)
435 559 def _commit_branches(_context_uid, _repo_id, _commit_id):
436 560 repo_init = self._factory.repo_libgit2(wire)
437 561 with repo_init as repo:
438 562 branches = [x for x in repo.branches.with_commit(_commit_id)]
439 563 return branches
440 564
441 565 return _commit_branches(context_uid, repo_id, commit_id)
442 566
443 567 @reraise_safe_exceptions
444 568 def add_object(self, wire, content):
445 569 repo_init = self._factory.repo_libgit2(wire)
446 570 with repo_init as repo:
447 571 blob = objects.Blob()
448 572 blob.set_raw_string(content)
449 573 repo.object_store.add_object(blob)
450 574 return blob.id
451 575
452 # TODO: this is quite complex, check if that can be simplified
576 @reraise_safe_exceptions
577 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
578 date_args: list[int, int] = None,
579 parents: list | None = None):
580
581 repo_init = self._factory.repo_libgit2(wire)
582 with repo_init as repo:
583
584 if date_args:
585 current_time, offset = date_args
586
587 kw = {
588 'time': current_time,
589 'offset': offset
590 }
591 author = create_signature_from_string(author, **kw)
592 committer = create_signature_from_string(committer, **kw)
593
594 tree = new_tree_id
595 if isinstance(tree, (bytes, str)):
596 # validate this tree is in the repo...
597 tree = repo[safe_str(tree)].id
598
599 if parents:
600 # run via sha's and validate them in repo
601 parents = [repo[c].id for c in parents]
602 else:
603 parents = []
604 # ensure we COMMIT on top of given branch head
605 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
606 if branch in repo.branches.local:
607 parents += [repo.branches[branch].target]
608 elif [x for x in repo.branches.local]:
609 parents += [repo.head.target]
610 #else:
611 # in case we want to commit on new branch we create it on top of HEAD
612 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
613
614 # # Create a new commit
615 commit_oid = repo.create_commit(
616 f'refs/heads/{branch}', # the name of the reference to update
617 author, # the author of the commit
618 committer, # the committer of the commit
619 message, # the commit message
620 tree, # the tree produced by the index
621 parents # list of parents for the new commit, usually just one,
622 )
623
624 new_commit_id = safe_str(commit_oid)
625
626 return new_commit_id
627
453 628 @reraise_safe_exceptions
454 629 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
455 # Defines the root tree
456 class _Root(object):
457 def __repr__(self):
458 return 'ROOT TREE'
459 ROOT = _Root()
460 630
461 repo = self._factory.repo(wire)
462 object_store = repo.object_store
463
464 # Create tree and populates it with blobs
465
466 if commit_tree and repo[commit_tree]:
467 git_commit = repo[commit_data['parents'][0]]
468 commit_tree = repo[git_commit.tree] # root tree
469 else:
470 commit_tree = objects.Tree()
471
472 for node in updated:
473 # Compute subdirs if needed
474 dirpath, nodename = vcspath.split(node['path'])
475 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
476 parent = commit_tree
477 ancestors = [('', parent)]
631 def mode2pygit(mode):
632 """
633 git only supports two filemode 644 and 755
478 634
479 # Tries to dig for the deepest existing tree
480 while dirnames:
481 curdir = dirnames.pop(0)
482 try:
483 dir_id = parent[curdir][1]
484 except KeyError:
485 # put curdir back into dirnames and stops
486 dirnames.insert(0, curdir)
487 break
488 else:
489 # If found, updates parent
490 parent = repo[dir_id]
491 ancestors.append((curdir, parent))
492 # Now parent is deepest existing tree and we need to create
493 # subtrees for dirnames (in reverse order)
494 # [this only applies for nodes from added]
495 new_trees = []
635 0o100755 -> 33261
636 0o100644 -> 33188
637 """
638 return {
639 0o100644: pygit2.GIT_FILEMODE_BLOB,
640 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
641 0o120000: pygit2.GIT_FILEMODE_LINK
642 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
496 643
497 blob = objects.Blob.from_string(node['content'])
644 repo_init = self._factory.repo_libgit2(wire)
645 with repo_init as repo:
646 repo_index = repo.index
498 647
499 if dirnames:
500 # If there are trees which should be created we need to build
501 # them now (in reverse order)
502 reversed_dirnames = list(reversed(dirnames))
503 curtree = objects.Tree()
504 curtree[node['node_path']] = node['mode'], blob.id
505 new_trees.append(curtree)
506 for dirname in reversed_dirnames[:-1]:
507 newtree = objects.Tree()
508 newtree[dirname] = (DIR_STAT, curtree.id)
509 new_trees.append(newtree)
510 curtree = newtree
511 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
512 else:
513 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
648 commit_parents = None
649 if commit_tree and commit_data['parents']:
650 commit_parents = commit_data['parents']
651 parent_commit = repo[commit_parents[0]]
652 repo_index.read_tree(parent_commit.tree)
514 653
515 new_trees.append(parent)
516 # Update ancestors
517 reversed_ancestors = reversed(
518 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
519 for parent, tree, path in reversed_ancestors:
520 parent[path] = (DIR_STAT, tree.id)
521 object_store.add_object(tree)
654 for pathspec in updated:
655 blob_id = repo.create_blob(pathspec['content'])
656 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
657 repo_index.add(ie)
658
659 for pathspec in removed:
660 repo_index.remove(pathspec)
522 661
523 object_store.add_object(blob)
524 for tree in new_trees:
525 object_store.add_object(tree)
662 # Write changes to the index
663 repo_index.write()
664
665 # Create a tree from the updated index
666 written_commit_tree = repo_index.write_tree()
667
668 new_tree_id = written_commit_tree
526 669
527 for node_path in removed:
528 paths = node_path.split('/')
529 tree = commit_tree # start with top-level
530 trees = [{'tree': tree, 'path': ROOT}]
531 # Traverse deep into the forest...
532 # resolve final tree by iterating the path.
533 # e.g a/b/c.txt will get
534 # - root as tree then
535 # - 'a' as tree,
536 # - 'b' as tree,
537 # - stop at c as blob.
538 for path in paths:
539 try:
540 obj = repo[tree[path][1]]
541 if isinstance(obj, objects.Tree):
542 trees.append({'tree': obj, 'path': path})
543 tree = obj
544 except KeyError:
545 break
546 #PROBLEM:
547 """
548 We're not editing same reference tree object
549 """
550 # Cut down the blob and all rotten trees on the way back...
551 for path, tree_data in reversed(zip(paths, trees)):
552 tree = tree_data['tree']
553 tree.__delitem__(path)
554 # This operation edits the tree, we need to mark new commit back
670 author = commit_data['author']
671 committer = commit_data['committer']
672 message = commit_data['message']
673
674 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
555 675
556 if len(tree) > 0:
557 # This tree still has elements - don't remove it or any
558 # of it's parents
559 break
560
561 object_store.add_object(commit_tree)
676 new_commit_id = self.create_commit(wire, author, committer, message, branch,
677 new_tree_id, date_args=date_args, parents=commit_parents)
562 678
563 # Create commit
564 commit = objects.Commit()
565 commit.tree = commit_tree.id
566 for k, v in commit_data.items():
567 setattr(commit, k, v)
568 object_store.add_object(commit)
679 # libgit2, ensure the branch is there and exists
680 self.create_branch(wire, branch, new_commit_id)
569 681
570 self.create_branch(wire, branch, commit.id)
682 # libgit2, set new ref to this created commit
683 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
571 684
572 # dulwich set-ref
573 ref = 'refs/heads/%s' % branch
574 repo.refs[ref] = commit.id
575
576 return commit.id
685 return new_commit_id
577 686
578 687 @reraise_safe_exceptions
579 688 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
580 689 if url != 'default' and '://' not in url:
581 690 client = LocalGitClient(url)
582 691 else:
583 url_obj = url_parser(url)
692 url_obj = url_parser(safe_bytes(url))
584 693 o = self._build_opener(url)
585 url, _ = url_obj.authinfo()
694 url = url_obj.authinfo()[0]
586 695 client = HttpGitClient(base_url=url, opener=o)
587 696 repo = self._factory.repo(wire)
588 697
589 698 determine_wants = repo.object_store.determine_wants_all
699
590 700 if refs:
591 def determine_wants_requested(references):
592 return [references[r] for r in references if r in refs]
701 refs: list[bytes] = [ascii_bytes(x) for x in refs]
702
703 def determine_wants_requested(_remote_refs):
704 determined = []
705 for ref_name, ref_hash in _remote_refs.items():
706 bytes_ref_name = safe_bytes(ref_name)
707
708 if bytes_ref_name in refs:
709 bytes_ref_hash = safe_bytes(ref_hash)
710 determined.append(bytes_ref_hash)
711 return determined
712
713 # swap with our custom requested wants
593 714 determine_wants = determine_wants_requested
594 715
595 716 try:
596 717 remote_refs = client.fetch(
597 718 path=url, target=repo, determine_wants=determine_wants)
719
598 720 except NotGitRepository as e:
599 721 log.warning(
600 722 'Trying to fetch from "%s" failed, not a Git repository.', url)
601 723 # Exception can contain unicode which we convert
602 724 raise exceptions.AbortException(e)(repr(e))
603 725
604 726 # mikhail: client.fetch() returns all the remote refs, but fetches only
605 727 # refs filtered by `determine_wants` function. We need to filter result
606 728 # as well
607 729 if refs:
608 730 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
609 731
610 732 if apply_refs:
611 733 # TODO: johbo: Needs proper test coverage with a git repository
612 734 # that contains a tag object, so that we would end up with
613 735 # a peeled ref at this point.
614 736 for k in remote_refs:
615 737 if k.endswith(PEELED_REF_MARKER):
616 738 log.debug("Skipping peeled reference %s", k)
617 739 continue
618 740 repo[k] = remote_refs[k]
619 741
620 742 if refs and not update_after:
743 # update to ref
621 744 # mikhail: explicitly set the head to the last ref.
622 repo["HEAD"] = remote_refs[refs[-1]]
745 update_to_ref = refs[-1]
746 if isinstance(update_after, str):
747 update_to_ref = update_after
748
749 repo[HEAD_MARKER] = remote_refs[update_to_ref]
623 750
624 751 if update_after:
625 # we want to checkout HEAD
626 repo["HEAD"] = remote_refs["HEAD"]
752 # we want to check out HEAD
753 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
627 754 index.build_index_from_tree(repo.path, repo.index_path(),
628 repo.object_store, repo["HEAD"].tree)
755 repo.object_store, repo[HEAD_MARKER].tree)
756
757 if isinstance(remote_refs, FetchPackResult):
758 return remote_refs.refs
629 759 return remote_refs
630 760
631 761 @reraise_safe_exceptions
632 def sync_fetch(self, wire, url, refs=None, all_refs=False):
633 repo = self._factory.repo(wire)
762 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
763 self._factory.repo(wire)
634 764 if refs and not isinstance(refs, (list, tuple)):
635 765 refs = [refs]
636 766
637 767 config = self._wire_to_config(wire)
638 768 # get all remote refs we'll use to fetch later
639 769 cmd = ['ls-remote']
640 770 if not all_refs:
641 771 cmd += ['--heads', '--tags']
642 772 cmd += [url]
643 773 output, __ = self.run_git_command(
644 774 wire, cmd, fail_on_stderr=False,
645 775 _copts=self._remote_conf(config),
646 776 extra_env={'GIT_TERMINAL_PROMPT': '0'})
647 777
648 778 remote_refs = collections.OrderedDict()
649 779 fetch_refs = []
650 780
651 781 for ref_line in output.splitlines():
652 sha, ref = ref_line.split('\t')
782 sha, ref = ref_line.split(b'\t')
653 783 sha = sha.strip()
654 784 if ref in remote_refs:
655 785 # duplicate, skip
656 786 continue
657 787 if ref.endswith(PEELED_REF_MARKER):
658 788 log.debug("Skipping peeled reference %s", ref)
659 789 continue
660 790 # don't sync HEAD
661 if ref in ['HEAD']:
791 if ref in [HEAD_MARKER]:
662 792 continue
663 793
664 794 remote_refs[ref] = sha
665 795
666 796 if refs and sha in refs:
667 797 # we filter fetch using our specified refs
668 fetch_refs.append('{}:{}'.format(ref, ref))
798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
669 799 elif not refs:
670 fetch_refs.append('{}:{}'.format(ref, ref))
800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
671 801 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
672 802
673 803 if fetch_refs:
674 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
804 for chunk in more_itertools.chunked(fetch_refs, 128):
675 805 fetch_refs_chunks = list(chunk)
676 806 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
677 _out, _err = self.run_git_command(
807 self.run_git_command(
678 808 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
679 809 fail_on_stderr=False,
680 810 _copts=self._remote_conf(config),
681 811 extra_env={'GIT_TERMINAL_PROMPT': '0'})
812 if kwargs.get('sync_large_objects'):
813 self.run_git_command(
814 wire, ['lfs', 'fetch', url, '--all'],
815 fail_on_stderr=False,
816 _copts=self._remote_conf(config),
817 )
682 818
683 819 return remote_refs
684 820
685 821 @reraise_safe_exceptions
686 def sync_push(self, wire, url, refs=None):
822 def sync_push(self, wire, url, refs=None, **kwargs):
687 823 if not self.check_url(url, wire):
688 824 return
689 825 config = self._wire_to_config(wire)
690 826 self._factory.repo(wire)
691 827 self.run_git_command(
692 828 wire, ['push', url, '--mirror'], fail_on_stderr=False,
693 829 _copts=self._remote_conf(config),
694 830 extra_env={'GIT_TERMINAL_PROMPT': '0'})
831 if kwargs.get('sync_large_objects'):
832 self.run_git_command(
833 wire, ['lfs', 'push', url, '--all'],
834 fail_on_stderr=False,
835 _copts=self._remote_conf(config),
836 )
695 837
696 838 @reraise_safe_exceptions
697 839 def get_remote_refs(self, wire, url):
698 840 repo = Repo(url)
699 841 return repo.get_refs()
700 842
701 843 @reraise_safe_exceptions
702 844 def get_description(self, wire):
703 845 repo = self._factory.repo(wire)
704 846 return repo.get_description()
705 847
706 848 @reraise_safe_exceptions
707 def get_missing_revs(self, wire, rev1, rev2, path2):
849 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
850 origin_repo_path = wire['path']
708 851 repo = self._factory.repo(wire)
709 LocalGitClient(thin_packs=False).fetch(path2, repo)
852 # fetch from other_repo_path to our origin repo
853 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
710 854
711 855 wire_remote = wire.copy()
712 wire_remote['path'] = path2
856 wire_remote['path'] = other_repo_path
713 857 repo_remote = self._factory.repo(wire_remote)
714 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
858
859 # fetch from origin_repo_path to our remote repo
860 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
715 861
716 862 revs = [
717 863 x.commit.id
718 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
864 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
719 865 return revs
720 866
721 867 @reraise_safe_exceptions
722 868 def get_object(self, wire, sha, maybe_unreachable=False):
723 869 cache_on, context_uid, repo_id = self._cache_on(wire)
724 870 region = self._region(wire)
871
725 872 @region.conditional_cache_on_arguments(condition=cache_on)
726 873 def _get_object(_context_uid, _repo_id, _sha):
727 874 repo_init = self._factory.repo_libgit2(wire)
728 875 with repo_init as repo:
729 876
730 877 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
731 878 try:
732 879 commit = repo.revparse_single(sha)
733 880 except KeyError:
734 881 # NOTE(marcink): KeyError doesn't give us any meaningful information
735 882 # here, we instead give something more explicit
736 883 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
737 884 raise exceptions.LookupException(e)(missing_commit_err)
738 885 except ValueError as e:
739 886 raise exceptions.LookupException(e)(missing_commit_err)
740 887
741 888 is_tag = False
742 889 if isinstance(commit, pygit2.Tag):
743 890 commit = repo.get(commit.target)
744 891 is_tag = True
745 892
746 893 check_dangling = True
747 894 if is_tag:
748 895 check_dangling = False
749 896
750 897 if check_dangling and maybe_unreachable:
751 898 check_dangling = False
752 899
753 900 # we used a reference and it parsed means we're not having a dangling commit
754 901 if sha != commit.hex:
755 902 check_dangling = False
756 903
757 904 if check_dangling:
758 905 # check for dangling commit
759 906 for branch in repo.branches.with_commit(commit.hex):
760 907 if branch:
761 908 break
762 909 else:
763 910 # NOTE(marcink): Empty error doesn't give us any meaningful information
764 911 # here, we instead give something more explicit
765 912 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
766 913 raise exceptions.LookupException(e)(missing_commit_err)
767 914
768 915 commit_id = commit.hex
769 type_id = commit.type
916 type_str = commit.type_str
770 917
771 918 return {
772 919 'id': commit_id,
773 'type': self._type_id_to_name(type_id),
920 'type': type_str,
774 921 'commit_id': commit_id,
775 922 'idx': 0
776 923 }
777 924
778 925 return _get_object(context_uid, repo_id, sha)
779 926
780 927 @reraise_safe_exceptions
781 928 def get_refs(self, wire):
782 929 cache_on, context_uid, repo_id = self._cache_on(wire)
783 930 region = self._region(wire)
931
784 932 @region.conditional_cache_on_arguments(condition=cache_on)
785 933 def _get_refs(_context_uid, _repo_id):
786 934
787 935 repo_init = self._factory.repo_libgit2(wire)
788 936 with repo_init as repo:
789 937 regex = re.compile('^refs/(heads|tags)/')
790 938 return {x.name: x.target.hex for x in
791 filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
939 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
792 940
793 941 return _get_refs(context_uid, repo_id)
794 942
795 943 @reraise_safe_exceptions
796 944 def get_branch_pointers(self, wire):
797 945 cache_on, context_uid, repo_id = self._cache_on(wire)
798 946 region = self._region(wire)
947
799 948 @region.conditional_cache_on_arguments(condition=cache_on)
800 949 def _get_branch_pointers(_context_uid, _repo_id):
801 950
802 951 repo_init = self._factory.repo_libgit2(wire)
803 952 regex = re.compile('^refs/heads')
804 953 with repo_init as repo:
805 branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
954 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
806 955 return {x.target.hex: x.shorthand for x in branches}
807 956
808 957 return _get_branch_pointers(context_uid, repo_id)
809 958
810 959 @reraise_safe_exceptions
811 960 def head(self, wire, show_exc=True):
812 961 cache_on, context_uid, repo_id = self._cache_on(wire)
813 962 region = self._region(wire)
963
814 964 @region.conditional_cache_on_arguments(condition=cache_on)
815 965 def _head(_context_uid, _repo_id, _show_exc):
816 966 repo_init = self._factory.repo_libgit2(wire)
817 967 with repo_init as repo:
818 968 try:
819 969 return repo.head.peel().hex
820 970 except Exception:
821 971 if show_exc:
822 972 raise
823 973 return _head(context_uid, repo_id, show_exc)
824 974
825 975 @reraise_safe_exceptions
826 976 def init(self, wire):
827 repo_path = str_to_dulwich(wire['path'])
828 self.repo = Repo.init(repo_path)
977 repo_path = safe_str(wire['path'])
978 os.makedirs(repo_path, mode=0o755)
979 pygit2.init_repository(repo_path, bare=False)
829 980
830 981 @reraise_safe_exceptions
831 982 def init_bare(self, wire):
832 repo_path = str_to_dulwich(wire['path'])
833 self.repo = Repo.init_bare(repo_path)
983 repo_path = safe_str(wire['path'])
984 os.makedirs(repo_path, mode=0o755)
985 pygit2.init_repository(repo_path, bare=True)
834 986
835 987 @reraise_safe_exceptions
836 988 def revision(self, wire, rev):
837 989
838 990 cache_on, context_uid, repo_id = self._cache_on(wire)
839 991 region = self._region(wire)
992
840 993 @region.conditional_cache_on_arguments(condition=cache_on)
841 994 def _revision(_context_uid, _repo_id, _rev):
842 995 repo_init = self._factory.repo_libgit2(wire)
843 996 with repo_init as repo:
844 997 commit = repo[rev]
845 998 obj_data = {
846 999 'id': commit.id.hex,
847 1000 }
848 1001 # tree objects itself don't have tree_id attribute
849 1002 if hasattr(commit, 'tree_id'):
850 1003 obj_data['tree'] = commit.tree_id.hex
851 1004
852 1005 return obj_data
853 1006 return _revision(context_uid, repo_id, rev)
854 1007
855 1008 @reraise_safe_exceptions
856 1009 def date(self, wire, commit_id):
857 1010 cache_on, context_uid, repo_id = self._cache_on(wire)
858 1011 region = self._region(wire)
1012
859 1013 @region.conditional_cache_on_arguments(condition=cache_on)
860 1014 def _date(_repo_id, _commit_id):
861 1015 repo_init = self._factory.repo_libgit2(wire)
862 1016 with repo_init as repo:
863 1017 commit = repo[commit_id]
864 1018
865 1019 if hasattr(commit, 'commit_time'):
866 1020 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
867 1021 else:
868 1022 commit = commit.get_object()
869 1023 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
870 1024
871 1025 # TODO(marcink): check dulwich difference of offset vs timezone
872 1026 return [commit_time, commit_time_offset]
873 1027 return _date(repo_id, commit_id)
874 1028
875 1029 @reraise_safe_exceptions
876 1030 def author(self, wire, commit_id):
877 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
878 1032 region = self._region(wire)
1033
879 1034 @region.conditional_cache_on_arguments(condition=cache_on)
880 1035 def _author(_repo_id, _commit_id):
881 1036 repo_init = self._factory.repo_libgit2(wire)
882 1037 with repo_init as repo:
883 1038 commit = repo[commit_id]
884 1039
885 1040 if hasattr(commit, 'author'):
886 1041 author = commit.author
887 1042 else:
888 1043 author = commit.get_object().author
889 1044
890 1045 if author.email:
891 return u"{} <{}>".format(author.name, author.email)
1046 return f"{author.name} <{author.email}>"
892 1047
893 1048 try:
894 return u"{}".format(author.name)
1049 return f"{author.name}"
895 1050 except Exception:
896 return u"{}".format(safe_unicode(author.raw_name))
1051 return f"{safe_str(author.raw_name)}"
897 1052
898 1053 return _author(repo_id, commit_id)
899 1054
900 1055 @reraise_safe_exceptions
901 1056 def message(self, wire, commit_id):
902 1057 cache_on, context_uid, repo_id = self._cache_on(wire)
903 1058 region = self._region(wire)
1059
904 1060 @region.conditional_cache_on_arguments(condition=cache_on)
905 1061 def _message(_repo_id, _commit_id):
906 1062 repo_init = self._factory.repo_libgit2(wire)
907 1063 with repo_init as repo:
908 1064 commit = repo[commit_id]
909 1065 return commit.message
910 1066 return _message(repo_id, commit_id)
911 1067
912 1068 @reraise_safe_exceptions
913 1069 def parents(self, wire, commit_id):
914 1070 cache_on, context_uid, repo_id = self._cache_on(wire)
915 1071 region = self._region(wire)
1072
916 1073 @region.conditional_cache_on_arguments(condition=cache_on)
917 1074 def _parents(_repo_id, _commit_id):
918 1075 repo_init = self._factory.repo_libgit2(wire)
919 1076 with repo_init as repo:
920 1077 commit = repo[commit_id]
921 1078 if hasattr(commit, 'parent_ids'):
922 1079 parent_ids = commit.parent_ids
923 1080 else:
924 1081 parent_ids = commit.get_object().parent_ids
925 1082
926 1083 return [x.hex for x in parent_ids]
927 1084 return _parents(repo_id, commit_id)
928 1085
929 1086 @reraise_safe_exceptions
930 1087 def children(self, wire, commit_id):
931 1088 cache_on, context_uid, repo_id = self._cache_on(wire)
932 1089 region = self._region(wire)
1090
1091 head = self.head(wire)
1092
933 1093 @region.conditional_cache_on_arguments(condition=cache_on)
934 1094 def _children(_repo_id, _commit_id):
1095
935 1096 output, __ = self.run_git_command(
936 wire, ['rev-list', '--all', '--children'])
1097 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
937 1098
938 1099 child_ids = []
939 pat = re.compile(r'^%s' % commit_id)
940 for l in output.splitlines():
941 if pat.match(l):
942 found_ids = l.split(' ')[1:]
1100 pat = re.compile(fr'^{commit_id}')
1101 for line in output.splitlines():
1102 line = safe_str(line)
1103 if pat.match(line):
1104 found_ids = line.split(' ')[1:]
943 1105 child_ids.extend(found_ids)
1106 break
944 1107
945 1108 return child_ids
946 1109 return _children(repo_id, commit_id)
947 1110
948 1111 @reraise_safe_exceptions
949 1112 def set_refs(self, wire, key, value):
950 1113 repo_init = self._factory.repo_libgit2(wire)
951 1114 with repo_init as repo:
952 1115 repo.references.create(key, value, force=True)
953 1116
954 1117 @reraise_safe_exceptions
1118 def update_refs(self, wire, key, value):
1119 repo_init = self._factory.repo_libgit2(wire)
1120 with repo_init as repo:
1121 if key not in repo.references:
1122 raise ValueError(f'Reference {key} not found in the repository')
1123 repo.references.create(key, value, force=True)
1124
1125 @reraise_safe_exceptions
955 1126 def create_branch(self, wire, branch_name, commit_id, force=False):
956 1127 repo_init = self._factory.repo_libgit2(wire)
957 1128 with repo_init as repo:
958 commit = repo[commit_id]
1129 if commit_id:
1130 commit = repo[commit_id]
1131 else:
1132 # if commit is not given just use the HEAD
1133 commit = repo.head()
959 1134
960 1135 if force:
961 1136 repo.branches.local.create(branch_name, commit, force=force)
962 1137 elif not repo.branches.get(branch_name):
963 1138 # create only if that branch isn't existing
964 1139 repo.branches.local.create(branch_name, commit, force=force)
965 1140
966 1141 @reraise_safe_exceptions
967 1142 def remove_ref(self, wire, key):
968 1143 repo_init = self._factory.repo_libgit2(wire)
969 1144 with repo_init as repo:
970 1145 repo.references.delete(key)
971 1146
972 1147 @reraise_safe_exceptions
973 1148 def tag_remove(self, wire, tag_name):
974 1149 repo_init = self._factory.repo_libgit2(wire)
975 1150 with repo_init as repo:
976 key = 'refs/tags/{}'.format(tag_name)
1151 key = f'refs/tags/{tag_name}'
977 1152 repo.references.delete(key)
978 1153
979 1154 @reraise_safe_exceptions
980 1155 def tree_changes(self, wire, source_id, target_id):
981 # TODO(marcink): remove this seems it's only used by tests
982 1156 repo = self._factory.repo(wire)
1157 # source can be empty
1158 source_id = safe_bytes(source_id if source_id else b'')
1159 target_id = safe_bytes(target_id)
1160
983 1161 source = repo[source_id].tree if source_id else None
984 1162 target = repo[target_id].tree
985 1163 result = repo.object_store.tree_changes(source, target)
986 return list(result)
1164
1165 added = set()
1166 modified = set()
1167 deleted = set()
1168 for (old_path, new_path), (_, _), (_, _) in list(result):
1169 if new_path and old_path:
1170 modified.add(new_path)
1171 elif new_path and not old_path:
1172 added.add(new_path)
1173 elif not new_path and old_path:
1174 deleted.add(old_path)
1175
1176 return list(added), list(modified), list(deleted)
987 1177
988 1178 @reraise_safe_exceptions
989 1179 def tree_and_type_for_path(self, wire, commit_id, path):
990 1180
991 1181 cache_on, context_uid, repo_id = self._cache_on(wire)
992 1182 region = self._region(wire)
1183
993 1184 @region.conditional_cache_on_arguments(condition=cache_on)
994 1185 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
995 1186 repo_init = self._factory.repo_libgit2(wire)
996 1187
997 1188 with repo_init as repo:
998 1189 commit = repo[commit_id]
999 1190 try:
1000 1191 tree = commit.tree[path]
1001 1192 except KeyError:
1002 1193 return None, None, None
1003 1194
1004 return tree.id.hex, tree.type, tree.filemode
1195 return tree.id.hex, tree.type_str, tree.filemode
1005 1196 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1006 1197
1007 1198 @reraise_safe_exceptions
1008 1199 def tree_items(self, wire, tree_id):
1009 1200 cache_on, context_uid, repo_id = self._cache_on(wire)
1010 1201 region = self._region(wire)
1202
1011 1203 @region.conditional_cache_on_arguments(condition=cache_on)
1012 1204 def _tree_items(_repo_id, _tree_id):
1013 1205
1014 1206 repo_init = self._factory.repo_libgit2(wire)
1015 1207 with repo_init as repo:
1016 1208 try:
1017 1209 tree = repo[tree_id]
1018 1210 except KeyError:
1019 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1211 raise ObjectMissing(f'No tree with id: {tree_id}')
1020 1212
1021 1213 result = []
1022 1214 for item in tree:
1023 1215 item_sha = item.hex
1024 1216 item_mode = item.filemode
1025 item_type = item.type
1217 item_type = item.type_str
1026 1218
1027 1219 if item_type == 'commit':
1028 1220 # NOTE(marcink): submodules we translate to 'link' for backward compat
1029 1221 item_type = 'link'
1030 1222
1031 1223 result.append((item.name, item_mode, item_sha, item_type))
1032 1224 return result
1033 1225 return _tree_items(repo_id, tree_id)
1034 1226
1035 1227 @reraise_safe_exceptions
1036 1228 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1037 1229 """
1038 1230 Old version that uses subprocess to call diff
1039 1231 """
1040 1232
1041 1233 flags = [
1042 '-U%s' % context, '--patch',
1234 f'-U{context}', '--patch',
1043 1235 '--binary',
1044 1236 '--find-renames',
1045 1237 '--no-indent-heuristic',
1046 1238 # '--indent-heuristic',
1047 1239 #'--full-index',
1048 1240 #'--abbrev=40'
1049 1241 ]
1050 1242
1051 1243 if opt_ignorews:
1052 1244 flags.append('--ignore-all-space')
1053 1245
1054 1246 if commit_id_1 == self.EMPTY_COMMIT:
1055 1247 cmd = ['show'] + flags + [commit_id_2]
1056 1248 else:
1057 1249 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1058 1250
1059 1251 if file_filter:
1060 1252 cmd.extend(['--', file_filter])
1061 1253
1062 1254 diff, __ = self.run_git_command(wire, cmd)
1063 1255 # If we used 'show' command, strip first few lines (until actual diff
1064 1256 # starts)
1065 1257 if commit_id_1 == self.EMPTY_COMMIT:
1066 1258 lines = diff.splitlines()
1067 1259 x = 0
1068 1260 for line in lines:
1069 if line.startswith('diff'):
1261 if line.startswith(b'diff'):
1070 1262 break
1071 1263 x += 1
1072 1264 # Append new line just like 'diff' command do
1073 1265 diff = '\n'.join(lines[x:]) + '\n'
1074 1266 return diff
1075 1267
1076 1268 @reraise_safe_exceptions
1077 1269 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1078 1270 repo_init = self._factory.repo_libgit2(wire)
1271
1079 1272 with repo_init as repo:
1080 1273 swap = True
1081 1274 flags = 0
1082 1275 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1083 1276
1084 1277 if opt_ignorews:
1085 1278 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1086 1279
1087 1280 if commit_id_1 == self.EMPTY_COMMIT:
1088 1281 comm1 = repo[commit_id_2]
1089 1282 diff_obj = comm1.tree.diff_to_tree(
1090 1283 flags=flags, context_lines=context, swap=swap)
1091 1284
1092 1285 else:
1093 1286 comm1 = repo[commit_id_2]
1094 1287 comm2 = repo[commit_id_1]
1095 1288 diff_obj = comm1.tree.diff_to_tree(
1096 1289 comm2.tree, flags=flags, context_lines=context, swap=swap)
1097 1290 similar_flags = 0
1098 1291 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1099 1292 diff_obj.find_similar(flags=similar_flags)
1100 1293
1101 1294 if file_filter:
1102 1295 for p in diff_obj:
1103 1296 if p.delta.old_file.path == file_filter:
1104 return p.patch or ''
1297 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1105 1298 # fo matching path == no diff
1106 return ''
1107 return diff_obj.patch or ''
1299 return BytesEnvelope(b'')
1300
1301 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1108 1302
1109 1303 @reraise_safe_exceptions
1110 1304 def node_history(self, wire, commit_id, path, limit):
1111 1305 cache_on, context_uid, repo_id = self._cache_on(wire)
1112 1306 region = self._region(wire)
1307
1113 1308 @region.conditional_cache_on_arguments(condition=cache_on)
1114 1309 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1115 1310 # optimize for n==1, rev-list is much faster for that use-case
1116 1311 if limit == 1:
1117 1312 cmd = ['rev-list', '-1', commit_id, '--', path]
1118 1313 else:
1119 1314 cmd = ['log']
1120 1315 if limit:
1121 1316 cmd.extend(['-n', str(safe_int(limit, 0))])
1122 1317 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1123 1318
1124 1319 output, __ = self.run_git_command(wire, cmd)
1125 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
1320 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1126 1321
1127 1322 return [x for x in commit_ids]
1128 1323 return _node_history(context_uid, repo_id, commit_id, path, limit)
1129 1324
1130 1325 @reraise_safe_exceptions
1131 def node_annotate(self, wire, commit_id, path):
1132
1326 def node_annotate_legacy(self, wire, commit_id, path):
1327 # note: replaced by pygit2 implementation
1133 1328 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1134 1329 # -l ==> outputs long shas (and we need all 40 characters)
1135 1330 # --root ==> doesn't put '^' character for boundaries
1136 1331 # -r commit_id ==> blames for the given commit
1137 1332 output, __ = self.run_git_command(wire, cmd)
1138 1333
1139 1334 result = []
1140 for i, blame_line in enumerate(output.split('\n')[:-1]):
1335 for i, blame_line in enumerate(output.splitlines()[:-1]):
1141 1336 line_no = i + 1
1142 commit_id, line = re.split(r' ', blame_line, 1)
1143 result.append((line_no, commit_id, line))
1337 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1338 result.append((line_no, blame_commit_id, line))
1339
1144 1340 return result
1145 1341
1146 1342 @reraise_safe_exceptions
1147 def update_server_info(self, wire):
1148 repo = self._factory.repo(wire)
1149 update_server_info(repo)
1343 def node_annotate(self, wire, commit_id, path):
1344
1345 result_libgit = []
1346 repo_init = self._factory.repo_libgit2(wire)
1347 with repo_init as repo:
1348 commit = repo[commit_id]
1349 blame_obj = repo.blame(path, newest_commit=commit_id)
1350 for i, line in enumerate(commit.tree[path].data.splitlines()):
1351 line_no = i + 1
1352 hunk = blame_obj.for_line(line_no)
1353 blame_commit_id = hunk.final_commit_id.hex
1354
1355 result_libgit.append((line_no, blame_commit_id, line))
1356
1357 return BinaryEnvelope(result_libgit)
1358
1359 @reraise_safe_exceptions
1360 def update_server_info(self, wire, force=False):
1361 cmd = ['update-server-info']
1362 if force:
1363 cmd += ['--force']
1364 output, __ = self.run_git_command(wire, cmd)
1365 return output.splitlines()
1150 1366
1151 1367 @reraise_safe_exceptions
1152 1368 def get_all_commit_ids(self, wire):
1153 1369
1154 1370 cache_on, context_uid, repo_id = self._cache_on(wire)
1155 1371 region = self._region(wire)
1372
1156 1373 @region.conditional_cache_on_arguments(condition=cache_on)
1157 1374 def _get_all_commit_ids(_context_uid, _repo_id):
1158 1375
1159 1376 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1160 1377 try:
1161 1378 output, __ = self.run_git_command(wire, cmd)
1162 1379 return output.splitlines()
1163 1380 except Exception:
1164 1381 # Can be raised for empty repositories
1165 1382 return []
1383
1384 @region.conditional_cache_on_arguments(condition=cache_on)
1385 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1386 repo_init = self._factory.repo_libgit2(wire)
1387 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1388 results = []
1389 with repo_init as repo:
1390 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1391 results.append(commit.id.hex)
1392
1166 1393 return _get_all_commit_ids(context_uid, repo_id)
1167 1394
1168 1395 @reraise_safe_exceptions
1169 1396 def run_git_command(self, wire, cmd, **opts):
1170 1397 path = wire.get('path', None)
1398 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1171 1399
1172 1400 if path and os.path.isdir(path):
1173 1401 opts['cwd'] = path
1174 1402
1175 1403 if '_bare' in opts:
1176 1404 _copts = []
1177 1405 del opts['_bare']
1178 1406 else:
1179 _copts = ['-c', 'core.quotepath=false', ]
1407 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1180 1408 safe_call = False
1181 1409 if '_safe' in opts:
1182 1410 # no exc on failure
1183 1411 del opts['_safe']
1184 1412 safe_call = True
1185 1413
1186 1414 if '_copts' in opts:
1187 1415 _copts.extend(opts['_copts'] or [])
1188 1416 del opts['_copts']
1189 1417
1190 1418 gitenv = os.environ.copy()
1191 1419 gitenv.update(opts.pop('extra_env', {}))
1192 1420 # need to clean fix GIT_DIR !
1193 1421 if 'GIT_DIR' in gitenv:
1194 1422 del gitenv['GIT_DIR']
1195 1423 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1196 1424 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1197 1425
1198 1426 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1199 1427 _opts = {'env': gitenv, 'shell': False}
1200 1428
1201 1429 proc = None
1202 1430 try:
1203 1431 _opts.update(opts)
1204 1432 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1205 1433
1206 return ''.join(proc), ''.join(proc.error)
1207 except (EnvironmentError, OSError) as err:
1208 cmd = ' '.join(cmd) # human friendly CMD
1209 tb_err = ("Couldn't run git command (%s).\n"
1210 "Original error was:%s\n"
1211 "Call options:%s\n"
1212 % (cmd, err, _opts))
1434 return b''.join(proc), b''.join(proc.stderr)
1435 except OSError as err:
1436 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1437 call_opts = {}
1438 if debug_mode:
1439 call_opts = _opts
1440
1441 tb_err = ("Couldn't run git command ({}).\n"
1442 "Original error was:{}\n"
1443 "Call options:{}\n"
1444 .format(cmd, err, call_opts))
1213 1445 log.exception(tb_err)
1214 1446 if safe_call:
1215 1447 return '', err
1216 1448 else:
1217 1449 raise exceptions.VcsException()(tb_err)
1218 1450 finally:
1219 1451 if proc:
1220 1452 proc.close()
1221 1453
1222 1454 @reraise_safe_exceptions
1223 1455 def install_hooks(self, wire, force=False):
1224 1456 from vcsserver.hook_utils import install_git_hooks
1225 1457 bare = self.bare(wire)
1226 1458 path = wire['path']
1459 binary_dir = settings.BINARY_DIR
1460 if binary_dir:
1461 os.path.join(binary_dir, 'python3')
1227 1462 return install_git_hooks(path, bare, force_create=force)
1228 1463
1229 1464 @reraise_safe_exceptions
1230 1465 def get_hooks_info(self, wire):
1231 1466 from vcsserver.hook_utils import (
1232 1467 get_git_pre_hook_version, get_git_post_hook_version)
1233 1468 bare = self.bare(wire)
1234 1469 path = wire['path']
1235 1470 return {
1236 1471 'pre_version': get_git_pre_hook_version(path, bare),
1237 1472 'post_version': get_git_post_hook_version(path, bare),
1238 1473 }
1239 1474
1240 1475 @reraise_safe_exceptions
1241 1476 def set_head_ref(self, wire, head_name):
1242 1477 log.debug('Setting refs/head to `%s`', head_name)
1243 cmd = ['symbolic-ref', 'HEAD', 'refs/heads/%s' % head_name]
1244 output, __ = self.run_git_command(wire, cmd)
1245 return [head_name] + output.splitlines()
1478 repo_init = self._factory.repo_libgit2(wire)
1479 with repo_init as repo:
1480 repo.set_head(f'refs/heads/{head_name}')
1481
1482 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1246 1483
1247 1484 @reraise_safe_exceptions
1248 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1249 archive_dir_name, commit_id):
1485 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1486 archive_dir_name, commit_id, cache_config):
1250 1487
1251 1488 def file_walker(_commit_id, path):
1252 1489 repo_init = self._factory.repo_libgit2(wire)
1253 1490
1254 1491 with repo_init as repo:
1255 1492 commit = repo[commit_id]
1256 1493
1257 1494 if path in ['', '/']:
1258 1495 tree = commit.tree
1259 1496 else:
1260 1497 tree = commit.tree[path.rstrip('/')]
1261 1498 tree_id = tree.id.hex
1262 1499 try:
1263 1500 tree = repo[tree_id]
1264 1501 except KeyError:
1265 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1502 raise ObjectMissing(f'No tree with id: {tree_id}')
1266 1503
1267 1504 index = LibGit2Index.Index()
1268 1505 index.read_tree(tree)
1269 1506 file_iter = index
1270 1507
1271 for fn in file_iter:
1272 file_path = fn.path
1273 mode = fn.mode
1508 for file_node in file_iter:
1509 file_path = file_node.path
1510 mode = file_node.mode
1274 1511 is_link = stat.S_ISLNK(mode)
1275 1512 if mode == pygit2.GIT_FILEMODE_COMMIT:
1276 1513 log.debug('Skipping path %s as a commit node', file_path)
1277 1514 continue
1278 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1515 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1279 1516
1280 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1281 archive_dir_name, commit_id)
1517 return store_archive_in_cache(
1518 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1 NO CONTENT: file renamed from vcsserver/hg.py to vcsserver/remote/hg_remote.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from vcsserver/svn.py to vcsserver/remote/svn_remote.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (1103 lines changed) Show them Hide them
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now