##// END OF EJS Templates
merged default branch into stable
super-admin -
r1219:e9ee7632 merge stable
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,53 b''
1 ; #####################
2 ; LOGGING CONFIGURATION
3 ; #####################
4 ; Logging template, used for configure the logging
5 ; some variables here are replaced by RhodeCode to default values
6
7 [loggers]
8 keys = root, vcsserver
9
10 [handlers]
11 keys = console
12
13 [formatters]
14 keys = generic, json
15
16 ; #######
17 ; LOGGERS
18 ; #######
19 [logger_root]
20 level = NOTSET
21 handlers = console
22
23 [logger_vcsserver]
24 level = $RC_LOGGING_LEVEL
25 handlers =
26 qualname = vcsserver
27 propagate = 1
28
29 ; ########
30 ; HANDLERS
31 ; ########
32
33 [handler_console]
34 class = StreamHandler
35 args = (sys.stderr, )
36 level = $RC_LOGGING_LEVEL
37 ; To enable JSON formatted logs replace generic with json
38 ; This allows sending properly formatted logs to grafana loki or elasticsearch
39 #formatter = json
40 #formatter = generic
41 formatter = $RC_LOGGING_FORMATTER
42
43 ; ##########
44 ; FORMATTERS
45 ; ##########
46
47 [formatter_generic]
48 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
49 datefmt = %Y-%m-%d %H:%M:%S
50
51 [formatter_json]
52 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
53 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -0,0 +1,73 b''
1 [build-system]
2 requires = ["setuptools>=61.0.0", "wheel"]
3 build-backend = "setuptools.build_meta"
4
5 [project]
6 name = "rhodecode-vcsserver"
7 description = "Version Control System Server for RhodeCode"
8 authors = [
9 {name = "RhodeCode GmbH", email = "support@rhodecode.com"},
10 ]
11
12 license = {text = "GPL V3"}
13 requires-python = ">=3.10"
14 dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
15 classifiers = [
16 'Development Status :: 6 - Mature',
17 'Intended Audience :: Developers',
18 'Operating System :: OS Independent',
19 'Topic :: Software Development :: Version Control',
20 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
21 'Programming Language :: Python :: 3.10',
22 ]
23
24 [project.entry-points."paste.app_factory"]
25 main = "vcsserver.http_main:main"
26
27
28 [tool.setuptools]
29 packages = ["vcsserver"]
30
31 [tool.setuptools.dynamic]
32 readme = {file = ["README.rst"], content-type = "text/rst"}
33 version = {file = "vcsserver/VERSION"}
34 dependencies = {file = ["requirements.txt"]}
35 optional-dependencies.tests = {file = ["requirements_test.txt"]}
36
37 [tool.ruff]
38
39 select = [
40 # Pyflakes
41 "F",
42 # Pycodestyle
43 "E",
44 "W",
45 # isort
46 "I001"
47 ]
48
49 ignore = [
50 "E501", # line too long, handled by black
51 ]
52
53 # Same as Black.
54 line-length = 120
55
56 [tool.ruff.isort]
57
58 known-first-party = ["vcsserver"]
59
60 [tool.ruff.format]
61
62 # Like Black, use double quotes for strings.
63 quote-style = "double"
64
65 # Like Black, indent with spaces, rather than tabs.
66 indent-style = "space"
67
68 # Like Black, respect magic trailing commas.
69 skip-magic-trailing-comma = false
70
71 # Like Black, automatically detect the appropriate line ending.
72 line-ending = "auto"
73
@@ -0,0 +1,1 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
@@ -0,0 +1,27 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 HOOK_REPO_SIZE = 'changegroup.repo_size'
20
21 # HG
22 HOOK_PRE_PULL = 'preoutgoing.pre_pull'
23 HOOK_PULL = 'outgoing.pull_logger'
24 HOOK_PRE_PUSH = 'prechangegroup.pre_push'
25 HOOK_PRETX_PUSH = 'pretxnchangegroup.pre_push'
26 HOOK_PUSH = 'changegroup.push_logger'
27 HOOK_PUSH_KEY = 'pushkey.key_push'
@@ -0,0 +1,168 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import os
20 import textwrap
21 import string
22 import functools
23 import logging
24 import tempfile
25 import logging.config
26
27 from vcsserver.type_utils import str2bool, aslist
28
29 log = logging.getLogger(__name__)
30
31 # skip keys, that are set here, so we don't double process those
32 set_keys = {
33 '__file__': ''
34 }
35
36
37 class SettingsMaker:
38
39 def __init__(self, app_settings):
40 self.settings = app_settings
41
42 @classmethod
43 def _bool_func(cls, input_val):
44 if isinstance(input_val, bytes):
45 # decode to str
46 input_val = input_val.decode('utf8')
47 return str2bool(input_val)
48
49 @classmethod
50 def _int_func(cls, input_val):
51 return int(input_val)
52
53 @classmethod
54 def _list_func(cls, input_val, sep=','):
55 return aslist(input_val, sep=sep)
56
57 @classmethod
58 def _string_func(cls, input_val, lower=True):
59 if lower:
60 input_val = input_val.lower()
61 return input_val
62
63 @classmethod
64 def _float_func(cls, input_val):
65 return float(input_val)
66
67 @classmethod
68 def _dir_func(cls, input_val, ensure_dir=False, mode=0o755):
69
70 # ensure we have our dir created
71 if not os.path.isdir(input_val) and ensure_dir:
72 os.makedirs(input_val, mode=mode, exist_ok=True)
73
74 if not os.path.isdir(input_val):
75 raise Exception(f'Dir at {input_val} does not exist')
76 return input_val
77
78 @classmethod
79 def _file_path_func(cls, input_val, ensure_dir=False, mode=0o755):
80 dirname = os.path.dirname(input_val)
81 cls._dir_func(dirname, ensure_dir=ensure_dir)
82 return input_val
83
84 @classmethod
85 def _key_transformator(cls, key):
86 return "{}_{}".format('RC'.upper(), key.upper().replace('.', '_').replace('-', '_'))
87
88 def maybe_env_key(self, key):
89 # now maybe we have this KEY in env, search and use the value with higher priority.
90 transformed_key = self._key_transformator(key)
91 envvar_value = os.environ.get(transformed_key)
92 if envvar_value:
93 log.debug('using `%s` key instead of `%s` key for config', transformed_key, key)
94
95 return envvar_value
96
97 def env_expand(self):
98 replaced = {}
99 for k, v in self.settings.items():
100 if k not in set_keys:
101 envvar_value = self.maybe_env_key(k)
102 if envvar_value:
103 replaced[k] = envvar_value
104 set_keys[k] = envvar_value
105
106 # replace ALL keys updated
107 self.settings.update(replaced)
108
109 def enable_logging(self, logging_conf=None, level='INFO', formatter='generic'):
110 """
111 Helper to enable debug on running instance
112 :return:
113 """
114
115 if not str2bool(self.settings.get('logging.autoconfigure')):
116 log.info('logging configuration based on main .ini file')
117 return
118
119 if logging_conf is None:
120 logging_conf = self.settings.get('logging.logging_conf_file') or ''
121
122 if not os.path.isfile(logging_conf):
123 log.error('Unable to setup logging based on %s, '
124 'file does not exist.... specify path using logging.logging_conf_file= config setting. ', logging_conf)
125 return
126
127 with open(logging_conf, 'rt') as f:
128 ini_template = textwrap.dedent(f.read())
129 ini_template = string.Template(ini_template).safe_substitute(
130 RC_LOGGING_LEVEL=os.environ.get('RC_LOGGING_LEVEL', '') or level,
131 RC_LOGGING_FORMATTER=os.environ.get('RC_LOGGING_FORMATTER', '') or formatter
132 )
133
134 with tempfile.NamedTemporaryFile(prefix='rc_logging_', suffix='.ini', delete=False) as f:
135 log.info('Saved Temporary LOGGING config at %s', f.name)
136 f.write(ini_template)
137
138 logging.config.fileConfig(f.name)
139 os.remove(f.name)
140
141 def make_setting(self, key, default, lower=False, default_when_empty=False, parser=None):
142 input_val = self.settings.get(key, default)
143
144 if default_when_empty and not input_val:
145 # use default value when value is set in the config but it is empty
146 input_val = default
147
148 parser_func = {
149 'bool': self._bool_func,
150 'int': self._int_func,
151 'list': self._list_func,
152 'list:newline': functools.partial(self._list_func, sep='/n'),
153 'list:spacesep': functools.partial(self._list_func, sep=' '),
154 'string': functools.partial(self._string_func, lower=lower),
155 'dir': self._dir_func,
156 'dir:ensured': functools.partial(self._dir_func, ensure_dir=True),
157 'file': self._file_path_func,
158 'file:ensured': functools.partial(self._file_path_func, ensure_dir=True),
159 None: lambda i: i
160 }[parser]
161
162 envvar_value = self.maybe_env_key(key)
163 if envvar_value:
164 input_val = envvar_value
165 set_keys[key] = input_val
166
167 self.settings[key] = parser_func(input_val)
168 return self.settings[key]
@@ -0,0 +1,243 b''
1 '''
2 This library is provided to allow standard python logging
3 to output log data as JSON formatted strings
4 '''
5 import logging
6 import json
7 import re
8 from datetime import date, datetime, time, tzinfo, timedelta
9 import traceback
10 import importlib
11
12 from inspect import istraceback
13
14 from collections import OrderedDict
15
16
17 def _inject_req_id(record, *args, **kwargs):
18 return record
19
20
21 ExceptionAwareFormatter = logging.Formatter
22
23
24 ZERO = timedelta(0)
25 HOUR = timedelta(hours=1)
26
27
28 class UTC(tzinfo):
29 """UTC"""
30
31 def utcoffset(self, dt):
32 return ZERO
33
34 def tzname(self, dt):
35 return "UTC"
36
37 def dst(self, dt):
38 return ZERO
39
40 utc = UTC()
41
42
43 # skip natural LogRecord attributes
44 # http://docs.python.org/library/logging.html#logrecord-attributes
45 RESERVED_ATTRS = (
46 'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename',
47 'funcName', 'levelname', 'levelno', 'lineno', 'module',
48 'msecs', 'message', 'msg', 'name', 'pathname', 'process',
49 'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName')
50
51
52 def merge_record_extra(record, target, reserved):
53 """
54 Merges extra attributes from LogRecord object into target dictionary
55
56 :param record: logging.LogRecord
57 :param target: dict to update
58 :param reserved: dict or list with reserved keys to skip
59 """
60 for key, value in record.__dict__.items():
61 # this allows to have numeric keys
62 if (key not in reserved
63 and not (hasattr(key, "startswith")
64 and key.startswith('_'))):
65 target[key] = value
66 return target
67
68
69 class JsonEncoder(json.JSONEncoder):
70 """
71 A custom encoder extending the default JSONEncoder
72 """
73
74 def default(self, obj):
75 if isinstance(obj, (date, datetime, time)):
76 return self.format_datetime_obj(obj)
77
78 elif istraceback(obj):
79 return ''.join(traceback.format_tb(obj)).strip()
80
81 elif type(obj) == Exception \
82 or isinstance(obj, Exception) \
83 or type(obj) == type:
84 return str(obj)
85
86 try:
87 return super().default(obj)
88
89 except TypeError:
90 try:
91 return str(obj)
92
93 except Exception:
94 return None
95
96 def format_datetime_obj(self, obj):
97 return obj.isoformat()
98
99
100 class JsonFormatter(ExceptionAwareFormatter):
101 """
102 A custom formatter to format logging records as json strings.
103 Extra values will be formatted as str() if not supported by
104 json default encoder
105 """
106
107 def __init__(self, *args, **kwargs):
108 """
109 :param json_default: a function for encoding non-standard objects
110 as outlined in http://docs.python.org/2/library/json.html
111 :param json_encoder: optional custom encoder
112 :param json_serializer: a :meth:`json.dumps`-compatible callable
113 that will be used to serialize the log record.
114 :param json_indent: an optional :meth:`json.dumps`-compatible numeric value
115 that will be used to customize the indent of the output json.
116 :param prefix: an optional string prefix added at the beginning of
117 the formatted string
118 :param json_indent: indent parameter for json.dumps
119 :param json_ensure_ascii: ensure_ascii parameter for json.dumps
120 :param reserved_attrs: an optional list of fields that will be skipped when
121 outputting json log record. Defaults to all log record attributes:
122 http://docs.python.org/library/logging.html#logrecord-attributes
123 :param timestamp: an optional string/boolean field to add a timestamp when
124 outputting the json log record. If string is passed, timestamp will be added
125 to log record using string as key. If True boolean is passed, timestamp key
126 will be "timestamp". Defaults to False/off.
127 """
128 self.json_default = self._str_to_fn(kwargs.pop("json_default", None))
129 self.json_encoder = self._str_to_fn(kwargs.pop("json_encoder", None))
130 self.json_serializer = self._str_to_fn(kwargs.pop("json_serializer", json.dumps))
131 self.json_indent = kwargs.pop("json_indent", None)
132 self.json_ensure_ascii = kwargs.pop("json_ensure_ascii", True)
133 self.prefix = kwargs.pop("prefix", "")
134 reserved_attrs = kwargs.pop("reserved_attrs", RESERVED_ATTRS)
135 self.reserved_attrs = dict(list(zip(reserved_attrs, reserved_attrs)))
136 self.timestamp = kwargs.pop("timestamp", True)
137
138 # super(JsonFormatter, self).__init__(*args, **kwargs)
139 logging.Formatter.__init__(self, *args, **kwargs)
140 if not self.json_encoder and not self.json_default:
141 self.json_encoder = JsonEncoder
142
143 self._required_fields = self.parse()
144 self._skip_fields = dict(list(zip(self._required_fields,
145 self._required_fields)))
146 self._skip_fields.update(self.reserved_attrs)
147
148 def _str_to_fn(self, fn_as_str):
149 """
150 If the argument is not a string, return whatever was passed in.
151 Parses a string such as package.module.function, imports the module
152 and returns the function.
153
154 :param fn_as_str: The string to parse. If not a string, return it.
155 """
156 if not isinstance(fn_as_str, str):
157 return fn_as_str
158
159 path, _, function = fn_as_str.rpartition('.')
160 module = importlib.import_module(path)
161 return getattr(module, function)
162
163 def parse(self):
164 """
165 Parses format string looking for substitutions
166
167 This method is responsible for returning a list of fields (as strings)
168 to include in all log messages.
169 """
170 standard_formatters = re.compile(r'\((.+?)\)', re.IGNORECASE)
171 return standard_formatters.findall(self._fmt)
172
173 def add_fields(self, log_record, record, message_dict):
174 """
175 Override this method to implement custom logic for adding fields.
176 """
177 for field in self._required_fields:
178 log_record[field] = record.__dict__.get(field)
179 log_record.update(message_dict)
180 merge_record_extra(record, log_record, reserved=self._skip_fields)
181
182 if self.timestamp:
183 key = self.timestamp if type(self.timestamp) == str else 'timestamp'
184 log_record[key] = datetime.fromtimestamp(record.created, tz=utc)
185
186 def process_log_record(self, log_record):
187 """
188 Override this method to implement custom logic
189 on the possibly ordered dictionary.
190 """
191 return log_record
192
193 def jsonify_log_record(self, log_record):
194 """Returns a json string of the log record."""
195 return self.json_serializer(log_record,
196 default=self.json_default,
197 cls=self.json_encoder,
198 indent=self.json_indent,
199 ensure_ascii=self.json_ensure_ascii)
200
201 def serialize_log_record(self, log_record):
202 """Returns the final representation of the log record."""
203 return "{}{}".format(self.prefix, self.jsonify_log_record(log_record))
204
205 def format(self, record):
206 """Formats a log record and serializes to json"""
207 message_dict = {}
208 # FIXME: logging.LogRecord.msg and logging.LogRecord.message in typeshed
209 # are always type of str. We shouldn't need to override that.
210 if isinstance(record.msg, dict):
211 message_dict = record.msg
212 record.message = None
213 else:
214 record.message = record.getMessage()
215 # only format time if needed
216 if "asctime" in self._required_fields:
217 record.asctime = self.formatTime(record, self.datefmt)
218
219 # Display formatted exception, but allow overriding it in the
220 # user-supplied dict.
221 if record.exc_info and not message_dict.get('exc_info'):
222 message_dict['exc_info'] = self.formatException(record.exc_info)
223 if not message_dict.get('exc_info') and record.exc_text:
224 message_dict['exc_info'] = record.exc_text
225 # Display formatted record of stack frames
226 # default format is a string returned from :func:`traceback.print_stack`
227 try:
228 if record.stack_info and not message_dict.get('stack_info'):
229 message_dict['stack_info'] = self.formatStack(record.stack_info)
230 except AttributeError:
231 # Python2.7 doesn't have stack_info.
232 pass
233
234 try:
235 log_record = OrderedDict()
236 except NameError:
237 log_record = {}
238
239 _inject_req_id(record, with_prefix=False)
240 self.add_fields(log_record, record, message_dict)
241 log_record = self.process_log_record(log_record)
242
243 return self.serialize_log_record(log_record)
@@ -0,0 +1,53 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
19 import sys
20 import logging
21
22
23 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(30, 38))
24
25 # Sequences
26 RESET_SEQ = "\033[0m"
27 COLOR_SEQ = "\033[0;%dm"
28 BOLD_SEQ = "\033[1m"
29
30 COLORS = {
31 'CRITICAL': MAGENTA,
32 'ERROR': RED,
33 'WARNING': CYAN,
34 'INFO': GREEN,
35 'DEBUG': BLUE,
36 'SQL': YELLOW
37 }
38
39
40 class ColorFormatter(logging.Formatter):
41
42 def format(self, record):
43 """
44 Change record's levelname to use with COLORS enum
45 """
46 def_record = super().format(record)
47
48 levelname = record.levelname
49 start = COLOR_SEQ % (COLORS[levelname])
50 end = RESET_SEQ
51
52 colored_record = ''.join([start, def_record, end])
53 return colored_record
@@ -0,0 +1,87 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import logging
19 import os
20 import diskcache
21 from diskcache import RLock
22
23 log = logging.getLogger(__name__)
24
25 cache_meta = None
26
27
28 class ReentrantLock(RLock):
29 def __enter__(self):
30 reentrant_lock_key = self._key
31
32 log.debug('Acquire ReentrantLock(key=%s) for archive cache generation...', reentrant_lock_key)
33 #self.acquire()
34 log.debug('Lock for key=%s acquired', reentrant_lock_key)
35
36 def __exit__(self, *exc_info):
37 #self.release()
38 pass
39
40
41 def get_archival_config(config):
42
43 final_config = {
44 'archive_cache.eviction_policy': 'least-frequently-used'
45 }
46
47 for k, v in config.items():
48 if k.startswith('archive_cache'):
49 final_config[k] = v
50
51 return final_config
52
53
54 def get_archival_cache_store(config):
55
56 global cache_meta
57 if cache_meta is not None:
58 return cache_meta
59
60 config = get_archival_config(config)
61
62 archive_cache_dir = config['archive_cache.store_dir']
63 archive_cache_size_gb = config['archive_cache.cache_size_gb']
64 archive_cache_shards = config['archive_cache.cache_shards']
65 archive_cache_eviction_policy = config['archive_cache.eviction_policy']
66
67 log.debug('Initializing archival cache instance under %s', archive_cache_dir)
68
69 # check if it's ok to write, and re-create the archive cache
70 if not os.path.isdir(archive_cache_dir):
71 os.makedirs(archive_cache_dir, exist_ok=True)
72
73 d_cache = diskcache.FanoutCache(
74 archive_cache_dir, shards=archive_cache_shards,
75 cull_limit=0, # manual eviction required
76 size_limit=archive_cache_size_gb * 1024 * 1024 * 1024,
77 eviction_policy=archive_cache_eviction_policy,
78 timeout=30
79 )
80 cache_meta = d_cache
81 return cache_meta
82
83
84 def includeme(config):
85 # init our cache at start, for vcsserver we don't init at runtime
86 # because our cache config is sent via wire on make archive call, this call just lazy-enables the client
87 return
@@ -0,0 +1,2 b''
1 # use orjson by default
2 import orjson as json
@@ -0,0 +1,70 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 from vcsserver.lib._vendor.statsd import client_from_config
19
20
21 class StatsdClientNotInitialised(Exception):
22 pass
23
24
25 class _Singleton(type):
26 """A metaclass that creates a Singleton base class when called."""
27
28 _instances = {}
29
30 def __call__(cls, *args, **kwargs):
31 if cls not in cls._instances:
32 cls._instances[cls] = super().__call__(*args, **kwargs)
33 return cls._instances[cls]
34
35
36 class Singleton(_Singleton("SingletonMeta", (object,), {})):
37 pass
38
39
40 class StatsdClientClass(Singleton):
41 setup_run = False
42 statsd_client = None
43 statsd = None
44 strict_mode_init = False
45
46 def __getattribute__(self, name):
47
48 if name.startswith("statsd"):
49 if self.setup_run:
50 return super().__getattribute__(name)
51 else:
52 if self.strict_mode_init:
53 raise StatsdClientNotInitialised(f"requested key was {name}")
54 return None
55
56 return super().__getattribute__(name)
57
58 def setup(self, settings):
59 """
60 Initialize the client
61 """
62 strict_init_mode = settings.pop('statsd_strict_init', False)
63
64 statsd = client_from_config(settings)
65 self.statsd = statsd
66 self.statsd_client = statsd
67 self.setup_run = True
68
69
70 StatsdClient = StatsdClientClass()
@@ -0,0 +1,160 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import os
19 import tempfile
20
21 from svn import client
22 from svn import core
23 from svn import ra
24
25 from mercurial import error
26
27 from vcsserver.str_utils import safe_bytes
28
29 core.svn_config_ensure(None)
30 svn_config = core.svn_config_get_config(None)
31
32
33 class RaCallbacks(ra.Callbacks):
34 @staticmethod
35 def open_tmp_file(pool): # pragma: no cover
36 (fd, fn) = tempfile.mkstemp()
37 os.close(fd)
38 return fn
39
40 @staticmethod
41 def get_client_string(pool):
42 return b'RhodeCode-subversion-url-checker'
43
44
45 class SubversionException(Exception):
46 pass
47
48
49 class SubversionConnectionException(SubversionException):
50 """Exception raised when a generic error occurs when connecting to a repository."""
51
52
53 def normalize_url(url):
54 if not url:
55 return url
56 if url.startswith(b'svn+http://') or url.startswith(b'svn+https://'):
57 url = url[4:]
58 url = url.rstrip(b'/')
59 return url
60
61
62 def _create_auth_baton(pool):
63 """Create a Subversion authentication baton. """
64 # Give the client context baton a suite of authentication
65 # providers.h
66 platform_specific = [
67 'svn_auth_get_gnome_keyring_simple_provider',
68 'svn_auth_get_gnome_keyring_ssl_client_cert_pw_provider',
69 'svn_auth_get_keychain_simple_provider',
70 'svn_auth_get_keychain_ssl_client_cert_pw_provider',
71 'svn_auth_get_kwallet_simple_provider',
72 'svn_auth_get_kwallet_ssl_client_cert_pw_provider',
73 'svn_auth_get_ssl_client_cert_file_provider',
74 'svn_auth_get_windows_simple_provider',
75 'svn_auth_get_windows_ssl_server_trust_provider',
76 ]
77
78 providers = []
79
80 for p in platform_specific:
81 if getattr(core, p, None) is not None:
82 try:
83 providers.append(getattr(core, p)())
84 except RuntimeError:
85 pass
86
87 providers += [
88 client.get_simple_provider(),
89 client.get_username_provider(),
90 client.get_ssl_client_cert_file_provider(),
91 client.get_ssl_client_cert_pw_file_provider(),
92 client.get_ssl_server_trust_file_provider(),
93 ]
94
95 return core.svn_auth_open(providers, pool)
96
97
98 class SubversionRepo:
99 """Wrapper for a Subversion repository.
100
101 It uses the SWIG Python bindings, see above for requirements.
102 """
103 def __init__(self, svn_url: bytes = b'', username: bytes = b'', password: bytes = b''):
104
105 self.username = username
106 self.password = password
107 self.svn_url = core.svn_path_canonicalize(svn_url)
108
109 self.auth_baton_pool = core.Pool()
110 self.auth_baton = _create_auth_baton(self.auth_baton_pool)
111 # self.init_ra_and_client() assumes that a pool already exists
112 self.pool = core.Pool()
113
114 self.ra = self.init_ra_and_client()
115 self.uuid = ra.get_uuid(self.ra, self.pool)
116
117 def init_ra_and_client(self):
118 """Initializes the RA and client layers, because sometimes getting
119 unified diffs runs the remote server out of open files.
120 """
121
122 if self.username:
123 core.svn_auth_set_parameter(self.auth_baton,
124 core.SVN_AUTH_PARAM_DEFAULT_USERNAME,
125 self.username)
126 if self.password:
127 core.svn_auth_set_parameter(self.auth_baton,
128 core.SVN_AUTH_PARAM_DEFAULT_PASSWORD,
129 self.password)
130
131 callbacks = RaCallbacks()
132 callbacks.auth_baton = self.auth_baton
133
134 try:
135 return ra.open2(self.svn_url, callbacks, svn_config, self.pool)
136 except SubversionException as e:
137 # e.child contains a detailed error messages
138 msglist = []
139 svn_exc = e
140 while svn_exc:
141 if svn_exc.args[0]:
142 msglist.append(svn_exc.args[0])
143 svn_exc = svn_exc.child
144 msg = '\n'.join(msglist)
145 raise SubversionConnectionException(msg)
146
147
148 class svnremoterepo:
149 """ the dumb wrapper for actual Subversion repositories """
150
151 def __init__(self, username: bytes = b'', password: bytes = b'', svn_url: bytes = b''):
152 self.username = username or b''
153 self.password = password or b''
154 self.path = normalize_url(svn_url)
155
156 def svn(self):
157 try:
158 return SubversionRepo(self.path, self.username, self.password)
159 except SubversionConnectionException as e:
160 raise error.Abort(safe_bytes(e))
@@ -0,0 +1,17 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
@@ -0,0 +1,144 b''
1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software Foundation,
16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18 import typing
19 import base64
20 import logging
21
22
23 log = logging.getLogger(__name__)
24
25
26 def safe_int(val, default=None) -> int:
27 """
28 Returns int() of val if val is not convertable to int use default
29 instead
30
31 :param val:
32 :param default:
33 """
34
35 try:
36 val = int(val)
37 except (ValueError, TypeError):
38 val = default
39
40 return val
41
42
43 def base64_to_str(text) -> str:
44 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
45
46
47 def get_default_encodings() -> list[str]:
48 return ['utf8']
49
50
51 def safe_str(str_, to_encoding=None) -> str:
52 """
53 safe str function. Does few trick to turn unicode_ into string
54
55 :param str_: str to encode
56 :param to_encoding: encode to this type UTF8 default
57 """
58 if isinstance(str_, str):
59 return str_
60
61 # if it's bytes cast to str
62 if not isinstance(str_, bytes):
63 return str(str_)
64
65 to_encoding = to_encoding or get_default_encodings()
66 if not isinstance(to_encoding, (list, tuple)):
67 to_encoding = [to_encoding]
68
69 for enc in to_encoding:
70 try:
71 return str(str_, enc)
72 except UnicodeDecodeError:
73 pass
74
75 return str(str_, to_encoding[0], 'replace')
76
77
78 def safe_bytes(str_, from_encoding=None) -> bytes:
79 """
80 safe bytes function. Does few trick to turn str_ into bytes string:
81
82 :param str_: string to decode
83 :param from_encoding: encode from this type UTF8 default
84 """
85 if isinstance(str_, bytes):
86 return str_
87
88 if not isinstance(str_, str):
89 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
90
91 from_encoding = from_encoding or get_default_encodings()
92 if not isinstance(from_encoding, (list, tuple)):
93 from_encoding = [from_encoding]
94
95 for enc in from_encoding:
96 try:
97 return str_.encode(enc)
98 except UnicodeDecodeError:
99 pass
100
101 return str_.encode(from_encoding[0], 'replace')
102
103
104 def ascii_bytes(str_, allow_bytes=False) -> bytes:
105 """
106 Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
107 Fails with UnicodeError on invalid input.
108 This should be used where encoding and "safe" ambiguity should be avoided.
109 Where strings already have been encoded in other ways but still are unicode
110 string - for example to hex, base64, json, urlencoding, or are known to be
111 identifiers.
112 """
113 if allow_bytes and isinstance(str_, bytes):
114 return str_
115
116 if not isinstance(str_, str):
117 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
118 return str_.encode('ascii')
119
120
121 def ascii_str(str_) -> str:
122 """
123 Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
124 Fails with UnicodeError on invalid input.
125 This should be used where encoding and "safe" ambiguity should be avoided.
126 Where strings are encoded but also in other ways are known to be ASCII, and
127 where a unicode string is wanted without caring about encoding. For example
128 to hex, base64, urlencoding, or are known to be identifiers.
129 """
130
131 if not isinstance(str_, bytes):
132 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
133 return str_.decode('ascii')
134
135
136 def convert_to_str(data):
137 if isinstance(data, bytes):
138 return safe_str(data)
139 elif isinstance(data, tuple):
140 return tuple(convert_to_str(item) for item in data)
141 elif isinstance(data, list):
142 return list(convert_to_str(item) for item in data)
143 else:
144 return data
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,6 +1,5 b''
1 1 [bumpversion]
2 current_version = 4.27.1
2 current_version = 5.0.0
3 3 message = release: Bump version {current_version} to {new_version}
4 4
5 5 [bumpversion:file:vcsserver/VERSION]
6
@@ -1,4 +1,5 b''
1 1 syntax: glob
2
2 3 *.orig
3 4 *.pyc
4 5 *.swp
@@ -19,8 +20,11 b' syntax: regexp'
19 20 ^\.pydevproject$
20 21 ^\.coverage$
21 22 ^\.cache.*$
23 ^\.venv.*$
24 ^\.ruff_cache.*$
22 25 ^\.rhodecode$
23 26
27
24 28 ^.dev
25 29 ^build/
26 30 ^coverage\.xml$
@@ -5,6 +5,9 b' include *.txt'
5 5 # package extras
6 6 include vcsserver/VERSION
7 7
8 # all python files inside vcsserver
9 graft vcsserver
10
8 11 # all config files
9 12 recursive-include configs *
10 13
@@ -14,3 +17,7 b' recursive-include vcsserver/hook_utils/h'
14 17 # skip any tests files
15 18 recursive-exclude vcsserver/tests *
16 19
20 recursive-exclude docs/_build *
21 recursive-exclude * __pycache__
22 recursive-exclude * *.py[co]
23 recursive-exclude * .*.sw[a-z]
@@ -1,45 +1,139 b''
1 .DEFAULT_GOAL := help
1 # required for pushd to work..
2 SHELL = /bin/bash
3
2 4
3 5 # set by: PATH_TO_OUTDATED_PACKAGES=/some/path/outdated_packages.py
4 6 OUTDATED_PACKAGES = ${PATH_TO_OUTDATED_PACKAGES}
5 7
6 8 .PHONY: clean
7 clean: ## full clean
9 ## Cleanup compiled and cache py files
10 clean:
8 11 make test-clean
9 12 find . -type f \( -iname '*.c' -o -iname '*.pyc' -o -iname '*.so' -o -iname '*.orig' \) -exec rm '{}' ';'
13 find . -type d -name "build" -prune -exec rm -rf '{}' ';'
10 14
11 15
12 16 .PHONY: test
13 test: ## run test-clean and tests
17 ## run test-clean and tests
18 test:
14 19 make test-clean
15 20 make test-only
16 21
17 22
18 .PHONY:test-clean
19 test-clean: ## run test-clean and tests
23 .PHONY: test-clean
24 ## run test-clean and tests
25 test-clean:
20 26 rm -rf coverage.xml htmlcov junit.xml pylint.log result
21 27 find . -type d -name "__pycache__" -prune -exec rm -rf '{}' ';'
22 28 find . -type f \( -iname '.coverage.*' \) -exec rm '{}' ';'
23 29
24 30
25 31 .PHONY: test-only
26 test-only: ## run tests
32 ## Run tests only without cleanup
33 test-only:
27 34 PYTHONHASHSEED=random \
28 35 py.test -x -vv -r xw -p no:sugar \
29 --cov=vcsserver --cov-report=term-missing --cov-report=html \
30 vcsserver
36 --cov-report=term-missing --cov-report=html \
37 --cov=vcsserver vcsserver
31 38
32 39
33 .PHONY: generate-pkgs
34 generate-pkgs: ## generate new python packages
35 nix-shell pkgs/shell-generate.nix --command "pip2nix generate --licenses"
40 .PHONY: ruff-check
41 ## run a ruff analysis
42 ruff-check:
43 ruff check --ignore F401 --ignore I001 --ignore E402 --ignore E501 --ignore F841 --exclude rhodecode/lib/dbmigrate --exclude .eggs --exclude .dev .
36 44
37 45
38 46 .PHONY: pip-packages
39 pip-packages: ## show outdated packages
47 ## Show outdated packages
48 pip-packages:
40 49 python ${OUTDATED_PACKAGES}
41 50
42 51
43 .PHONY: help
44 help:
45 @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-24s\033[0m %s\n", $$1, $$2}'
52 .PHONY: build
53 ## Build sdist/egg
54 build:
55 python -m build
56
57
58 .PHONY: dev-sh
59 ## make dev-sh
60 dev-sh:
61 sudo echo "deb [trusted=yes] https://apt.fury.io/rsteube/ /" | sudo tee -a "/etc/apt/sources.list.d/fury.list"
62 sudo apt-get update
63 sudo apt-get install -y zsh carapace-bin
64 rm -rf /home/rhodecode/.oh-my-zsh
65 curl https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh | sh
66 echo "source <(carapace _carapace)" > /home/rhodecode/.zsrc
67 PROMPT='%(?.%F{green}√.%F{red}?%?)%f %B%F{240}%1~%f%b %# ' zsh
68
69
70 .PHONY: dev-env
71 ## make dev-env based on the requirements files and install develop of packages
72 ## Cleanup: pip freeze | grep -v "^-e" | grep -v "@" | xargs pip uninstall -y
73 dev-env:
74 pip install build virtualenv
75 pip wheel --wheel-dir=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
76 pip install --no-index --find-links=/home/rhodecode/.cache/pip/wheels -r requirements.txt -r requirements_test.txt -r requirements_debug.txt
77 pip install -e .
78
79
80 .PHONY: sh
81 ## shortcut for make dev-sh dev-env
82 sh:
83 make dev-env
84 make dev-sh
85
86
87 .PHONY: dev-srv
88 ## run develop server instance, docker exec -it $(docker ps -q --filter 'name=dev-enterprise-ce') /bin/bash
89 dev-srv:
90 pserve --reload .dev/dev.ini
91
92
93 .PHONY: dev-srv-g
94 ## run gunicorn multi process workers
95 dev-srv-g:
96 gunicorn --workers=4 --paste .dev/dev.ini --bind=0.0.0.0:10010 --config=.dev/gunicorn_config.py
97
98
99 # Default command on calling make
100 .DEFAULT_GOAL := show-help
101
102 .PHONY: show-help
103 show-help:
104 @echo "$$(tput bold)Available rules:$$(tput sgr0)"
105 @echo
106 @sed -n -e "/^## / { \
107 h; \
108 s/.*//; \
109 :doc" \
110 -e "H; \
111 n; \
112 s/^## //; \
113 t doc" \
114 -e "s/:.*//; \
115 G; \
116 s/\\n## /---/; \
117 s/\\n/ /g; \
118 p; \
119 }" ${MAKEFILE_LIST} \
120 | LC_ALL='C' sort --ignore-case \
121 | awk -F '---' \
122 -v ncol=$$(tput cols) \
123 -v indent=19 \
124 -v col_on="$$(tput setaf 6)" \
125 -v col_off="$$(tput sgr0)" \
126 '{ \
127 printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
128 n = split($$2, words, " "); \
129 line_length = ncol - indent; \
130 for (i = 1; i <= n; i++) { \
131 line_length -= length(words[i]) + 1; \
132 if (line_length <= 0) { \
133 line_length = ncol - indent - length(words[i]) - 1; \
134 printf "\n%*s ", -indent, " "; \
135 } \
136 printf "%s ", words[i]; \
137 } \
138 printf "\n"; \
139 }'
@@ -1,4 +1,4 b''
1 ## -*- coding: utf-8 -*-
1 #
2 2
3 3 ; #################################
4 4 ; RHODECODE VCSSERVER CONFIGURATION
@@ -7,7 +7,7 b''
7 7 [server:main]
8 8 ; COMMON HOST/IP CONFIG
9 9 host = 0.0.0.0
10 port = 9900
10 port = 10010
11 11
12 12 ; ##################################################
13 13 ; WAITRESS WSGI SERVER - Recommended for Development
@@ -31,85 +31,25 b' asyncore_use_poll = true'
31 31 ; GUNICORN APPLICATION SERVER
32 32 ; ###########################
33 33
34 ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
34 ; run with gunicorn --paste rhodecode.ini
35 35
36 36 ; Module to use, this setting shouldn't be changed
37 37 #use = egg:gunicorn#main
38 38
39 ; Sets the number of process workers. More workers means more concurrent connections
40 ; RhodeCode can handle at the same time. Each additional worker also it increases
41 ; memory usage as each has it's own set of caches.
42 ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
43 ; than 8-10 unless for really big deployments .e.g 700-1000 users.
44 ; `instance_id = *` must be set in the [app:main] section below (which is the default)
45 ; when using more than 1 worker.
46 #workers = 2
47
48 ; Gunicorn access log level
49 #loglevel = info
50
51 ; Process name visible in process list
52 #proc_name = rhodecode_vcsserver
53
54 ; Type of worker class, one of `sync`, `gevent`
55 ; currently `sync` is the only option allowed.
56 #worker_class = sync
57
58 ; The maximum number of simultaneous clients. Valid only for gevent
59 #worker_connections = 10
60
61 ; Max number of requests that worker will handle before being gracefully restarted.
62 ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
63 #max_requests = 1000
64 #max_requests_jitter = 30
65
66 ; Amount of time a worker can spend with handling a request before it
67 ; gets killed and restarted. By default set to 21600 (6hrs)
68 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
69 #timeout = 21600
70
71 ; The maximum size of HTTP request line in bytes.
72 ; 0 for unlimited
73 #limit_request_line = 0
74
75 ; Limit the number of HTTP headers fields in a request.
76 ; By default this value is 100 and can't be larger than 32768.
77 #limit_request_fields = 32768
78
79 ; Limit the allowed size of an HTTP request header field.
80 ; Value is a positive number or 0.
81 ; Setting it to 0 will allow unlimited header field sizes.
82 #limit_request_field_size = 0
83
84 ; Timeout for graceful workers restart.
85 ; After receiving a restart signal, workers have this much time to finish
86 ; serving requests. Workers still alive after the timeout (starting from the
87 ; receipt of the restart signal) are force killed.
88 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
89 #graceful_timeout = 3600
90
91 # The number of seconds to wait for requests on a Keep-Alive connection.
92 # Generally set in the 1-5 seconds range.
93 #keepalive = 2
94
95 ; Maximum memory usage that each worker can use before it will receive a
96 ; graceful restart signal 0 = memory monitoring is disabled
97 ; Examples: 268435456 (256MB), 536870912 (512MB)
98 ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
99 #memory_max_usage = 0
100
101 ; How often in seconds to check for memory usage for each gunicorn worker
102 #memory_usage_check_interval = 60
103
104 ; Threshold value for which we don't recycle worker if GarbageCollection
105 ; frees up enough resources. Before each restart we try to run GC on worker
106 ; in case we get enough free memory after that, restart will not happen.
107 #memory_usage_recovery_threshold = 0.8
108
109
110 39 [app:main]
111 40 ; The %(here)s variable will be replaced with the absolute path of parent directory
112 41 ; of this file
42 ; Each option in the app:main can be override by an environmental variable
43 ;
44 ;To override an option:
45 ;
46 ;RC_<KeyName>
47 ;Everything should be uppercase, . and - should be replaced by _.
48 ;For example, if you have these configuration settings:
49 ;rc_cache.repo_object.backend = foo
50 ;can be overridden by
51 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
52
113 53 use = egg:rhodecode-vcsserver
114 54
115 55
@@ -133,13 +73,13 b' debugtoolbar.exclude_prefixes ='
133 73 ; #################
134 74
135 75 ; Pyramid default locales, we need this to be set
136 pyramid.default_locale_name = en
76 #pyramid.default_locale_name = en
137 77
138 78 ; default locale used by VCS systems
139 locale = en_US.UTF-8
79 #locale = en_US.UTF-8
140 80
141 81 ; path to binaries for vcsserver, it should be set by the installer
142 ; at installation time, e.g /home/user/vcsserver-1/profile/bin
82 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
143 83 ; it can also be a path to nix-build output in case of development
144 84 core.binary_dir = ""
145 85
@@ -153,21 +93,21 b' core.binary_dir = ""'
153 93
154 94 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
155 95 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
156 cache_dir = %(here)s/data
96 #cache_dir = %(here)s/data
157 97
158 98 ; ***************************************
159 99 ; `repo_object` cache, default file based
160 100 ; ***************************************
161 101
162 102 ; `repo_object` cache settings for vcs methods for repositories
163 rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
103 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
164 104
165 105 ; cache auto-expires after N seconds
166 106 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
167 rc_cache.repo_object.expiration_time = 2592000
107 #rc_cache.repo_object.expiration_time = 2592000
168 108
169 109 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
170 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
110 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
171 111
172 112 ; ***********************************************************
173 113 ; `repo_object` cache with redis backend
@@ -194,16 +134,29 b' rc_cache.repo_object.expiration_time = 2'
194 134 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
195 135 #rc_cache.repo_object.arguments.lock_auto_renewal = true
196 136
197 ; Statsd client config
137 ; Statsd client config, this is used to send metrics to statsd
138 ; We recommend setting statsd_exported and scrape them using Promethues
198 139 #statsd.enabled = false
199 140 #statsd.statsd_host = 0.0.0.0
200 141 #statsd.statsd_port = 8125
201 142 #statsd.statsd_prefix =
202 143 #statsd.statsd_ipv6 = false
203 144
145 ; configure logging automatically at server startup set to false
146 ; to use the below custom logging config.
147 ; RC_LOGGING_FORMATTER
148 ; RC_LOGGING_LEVEL
149 ; env variables can control the settings for logging in case of autoconfigure
150
151 #logging.autoconfigure = true
152
153 ; specify your own custom logging config file to configure logging
154 #logging.logging_conf_file = /path/to/custom_logging.ini
155
204 156 ; #####################
205 157 ; LOGGING CONFIGURATION
206 158 ; #####################
159
207 160 [loggers]
208 161 keys = root, vcsserver
209 162
@@ -211,7 +164,7 b' keys = root, vcsserver'
211 164 keys = console
212 165
213 166 [formatters]
214 keys = generic
167 keys = generic, json
215 168
216 169 ; #######
217 170 ; LOGGERS
@@ -226,7 +179,6 b' handlers ='
226 179 qualname = vcsserver
227 180 propagate = 1
228 181
229
230 182 ; ########
231 183 ; HANDLERS
232 184 ; ########
@@ -235,6 +187,8 b' propagate = 1'
235 187 class = StreamHandler
236 188 args = (sys.stderr, )
237 189 level = DEBUG
190 ; To enable JSON formatted logs replace 'generic' with 'json'
191 ; This allows sending properly formatted logs to grafana loki or elasticsearch
238 192 formatter = generic
239 193
240 194 ; ##########
@@ -244,3 +198,7 b' formatter = generic'
244 198 [formatter_generic]
245 199 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
246 200 datefmt = %Y-%m-%d %H:%M:%S
201
202 [formatter_json]
203 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
204 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -11,6 +11,8 b' import time'
11 11 import threading
12 12 import traceback
13 13 import random
14 import socket
15 import dataclasses
14 16 from gunicorn.glogging import Logger
15 17
16 18
@@ -18,8 +20,14 b' def get_workers():'
18 20 import multiprocessing
19 21 return multiprocessing.cpu_count() * 2 + 1
20 22
21 # GLOBAL
23
24 bind = "127.0.0.1:10010"
25
26
27 # Error logging output for gunicorn (-) is stdout
22 28 errorlog = '-'
29
30 # Access logging output for gunicorn (-) is stdout
23 31 accesslog = '-'
24 32
25 33
@@ -29,12 +37,112 b" accesslog = '-'"
29 37 worker_tmp_dir = None
30 38 tmp_upload_dir = None
31 39
40 # use re-use port logic
41 #reuse_port = True
42
32 43 # Custom log format
44 #access_log_format = (
45 # '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
46
47 # loki format for easier parsing in grafana
33 48 access_log_format = (
34 '%(t)s %(p)s INFO [GNCRN] %(h)-15s rqt:%(L)s %(s)s %(b)-6s "%(m)s:%(U)s %(q)s" usr:%(u)s "%(f)s" "%(a)s"')
49 'time="%(t)s" pid=%(p)s level="INFO" type="[GNCRN]" ip="%(h)-15s" rqt="%(L)s" response_code="%(s)s" response_bytes="%(b)-6s" uri="%(m)s:%(U)s %(q)s" user=":%(u)s" user_agent="%(a)s"')
50
51 # self adjust workers based on CPU count, to use maximum of CPU and not overquota the resources
52 # workers = get_workers()
53
54 # Gunicorn access log level
55 loglevel = 'info'
56
57 # Process name visible in a process list
58 proc_name = "rhodecode_vcsserver"
59
60 # Type of worker class, one of `sync`, `gevent` or `gthread`
61 # currently `sync` is the only option allowed for vcsserver and for rhodecode all of 3 are allowed
62 # gevent:
63 # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
64 # e.g. workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
65 # gthread:
66 # In this case, the maximum number of concurrent requests is (N workers * X threads)
67 # e.g. workers = 3 threads=3 = 3*3, 9 concurrent requests can be handled
68 worker_class = 'sync'
69
70 # Sets the number of process workers. More workers means more concurrent connections
71 # RhodeCode can handle at the same time. Each additional worker also it increases
72 # memory usage as each has its own set of caches.
73 # The Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
74 # than 8-10 unless for huge deployments .e.g 700-1000 users.
75 # `instance_id = *` must be set in the [app:main] section below (which is the default)
76 # when using more than 1 worker.
77 workers = 2
78
79 # Threads numbers for worker class gthread
80 threads = 1
81
82 # The maximum number of simultaneous clients. Valid only for gevent
83 # In this case, the maximum number of concurrent requests is (N workers * X worker_connections)
84 # e.g workers =3 worker_connections=10 = 3*10, 30 concurrent requests can be handled
85 worker_connections = 10
86
87 # Max number of requests that worker will handle before being gracefully restarted.
88 # Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
89 max_requests = 2000
90 max_requests_jitter = int(max_requests * 0.2) # 20% of max_requests
91
92 # The maximum number of pending connections.
93 # Exceeding this number results in the client getting an error when attempting to connect.
94 backlog = 64
35 95
36 # self adjust workers based on CPU count
37 # workers = get_workers()
96 # The Amount of time a worker can spend with handling a request before it
97 # gets killed and restarted. By default, set to 21600 (6hrs)
98 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
99 timeout = 21600
100
101 # The maximum size of HTTP request line in bytes.
102 # 0 for unlimited
103 limit_request_line = 0
104
105 # Limit the number of HTTP headers fields in a request.
106 # By default this value is 100 and can't be larger than 32768.
107 limit_request_fields = 32768
108
109 # Limit the allowed size of an HTTP request header field.
110 # Value is a positive number or 0.
111 # Setting it to 0 will allow unlimited header field sizes.
112 limit_request_field_size = 0
113
114 # Timeout for graceful workers restart.
115 # After receiving a restart signal, workers have this much time to finish
116 # serving requests. Workers still alive after the timeout (starting from the
117 # receipt of the restart signal) are force killed.
118 # Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
119 graceful_timeout = 21600
120
121 # The number of seconds to wait for requests on a Keep-Alive connection.
122 # Generally set in the 1-5 seconds range.
123 keepalive = 2
124
125 # Maximum memory usage that each worker can use before it will receive a
126 # graceful restart signal 0 = memory monitoring is disabled
127 # Examples: 268435456 (256MB), 536870912 (512MB)
128 # 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
129 # Dynamic formula 1024 * 1024 * 256 == 256MBs
130 memory_max_usage = 0
131
132 # How often in seconds to check for memory usage for each gunicorn worker
133 memory_usage_check_interval = 60
134
135 # Threshold value for which we don't recycle worker if GarbageCollection
136 # frees up enough resources. Before each restart, we try to run GC on worker
137 # in case we get enough free memory after that; restart will not happen.
138 memory_usage_recovery_threshold = 0.8
139
140
141 @dataclasses.dataclass
142 class MemoryCheckConfig:
143 max_usage: int
144 check_interval: int
145 recovery_threshold: float
38 146
39 147
40 148 def _get_process_rss(pid=None):
@@ -50,12 +158,9 b' def _get_process_rss(pid=None):'
50 158
51 159
52 160 def _get_config(ini_path):
161 import configparser
53 162
54 163 try:
55 import configparser
56 except ImportError:
57 import ConfigParser as configparser
58 try:
59 164 config = configparser.RawConfigParser()
60 165 config.read(ini_path)
61 166 return config
@@ -63,8 +168,40 b' def _get_config(ini_path):'
63 168 return None
64 169
65 170
66 def _time_with_offset(memory_usage_check_interval):
67 return time.time() - random.randint(0, memory_usage_check_interval/2.0)
171 def get_memory_usage_params(config=None):
172 # memory spec defaults
173 _memory_max_usage = memory_max_usage
174 _memory_usage_check_interval = memory_usage_check_interval
175 _memory_usage_recovery_threshold = memory_usage_recovery_threshold
176
177 if config:
178 ini_path = os.path.abspath(config)
179 conf = _get_config(ini_path)
180
181 section = 'server:main'
182 if conf and conf.has_section(section):
183
184 if conf.has_option(section, 'memory_max_usage'):
185 _memory_max_usage = conf.getint(section, 'memory_max_usage')
186
187 if conf.has_option(section, 'memory_usage_check_interval'):
188 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
189
190 if conf.has_option(section, 'memory_usage_recovery_threshold'):
191 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
192
193 _memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
194 or _memory_max_usage)
195 _memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
196 or _memory_usage_check_interval)
197 _memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
198 or _memory_usage_recovery_threshold)
199
200 return MemoryCheckConfig(_memory_max_usage, _memory_usage_check_interval, _memory_usage_recovery_threshold)
201
202
203 def _time_with_offset(check_interval):
204 return time.time() - random.randint(0, check_interval/2.0)
68 205
69 206
70 207 def pre_fork(server, worker):
@@ -73,39 +210,27 b' def pre_fork(server, worker):'
73 210
74 211 def post_fork(server, worker):
75 212
76 # memory spec defaults
77 _memory_max_usage = 0
78 _memory_usage_check_interval = 60
79 _memory_usage_recovery_threshold = 0.8
80
81 ini_path = os.path.abspath(server.cfg.paste)
82 conf = _get_config(ini_path)
83
84 section = 'server:main'
85 if conf and conf.has_section(section):
213 memory_conf = get_memory_usage_params()
214 _memory_max_usage = memory_conf.max_usage
215 _memory_usage_check_interval = memory_conf.check_interval
216 _memory_usage_recovery_threshold = memory_conf.recovery_threshold
86 217
87 if conf.has_option(section, 'memory_max_usage'):
88 _memory_max_usage = conf.getint(section, 'memory_max_usage')
89
90 if conf.has_option(section, 'memory_usage_check_interval'):
91 _memory_usage_check_interval = conf.getint(section, 'memory_usage_check_interval')
92
93 if conf.has_option(section, 'memory_usage_recovery_threshold'):
94 _memory_usage_recovery_threshold = conf.getfloat(section, 'memory_usage_recovery_threshold')
95
96 worker._memory_max_usage = _memory_max_usage
97 worker._memory_usage_check_interval = _memory_usage_check_interval
98 worker._memory_usage_recovery_threshold = _memory_usage_recovery_threshold
218 worker._memory_max_usage = int(os.environ.get('RC_GUNICORN_MEMORY_MAX_USAGE', '')
219 or _memory_max_usage)
220 worker._memory_usage_check_interval = int(os.environ.get('RC_GUNICORN_MEMORY_USAGE_CHECK_INTERVAL', '')
221 or _memory_usage_check_interval)
222 worker._memory_usage_recovery_threshold = float(os.environ.get('RC_GUNICORN_MEMORY_USAGE_RECOVERY_THRESHOLD', '')
223 or _memory_usage_recovery_threshold)
99 224
100 225 # register memory last check time, with some random offset so we don't recycle all
101 226 # at once
102 227 worker._last_memory_check_time = _time_with_offset(_memory_usage_check_interval)
103 228
104 229 if _memory_max_usage:
105 server.log.info("[%-10s] WORKER spawned with max memory set at %s", worker.pid,
230 server.log.info("pid=[%-10s] WORKER spawned with max memory set at %s", worker.pid,
106 231 _format_data_size(_memory_max_usage))
107 232 else:
108 server.log.info("[%-10s] WORKER spawned", worker.pid)
233 server.log.info("pid=[%-10s] WORKER spawned", worker.pid)
109 234
110 235
111 236 def pre_exec(server):
@@ -115,6 +240,9 b' def pre_exec(server):'
115 240 def on_starting(server):
116 241 server_lbl = '{} {}'.format(server.proc_name, server.address)
117 242 server.log.info("Server %s is starting.", server_lbl)
243 server.log.info('Config:')
244 server.log.info(f"\n{server.cfg}")
245 server.log.info(get_memory_usage_params())
118 246
119 247
120 248 def when_ready(server):
@@ -174,42 +302,45 b' def _format_data_size(size, unit="B", pr'
174 302
175 303
176 304 def _check_memory_usage(worker):
177 memory_max_usage = worker._memory_max_usage
178 if not memory_max_usage:
305 _memory_max_usage = worker._memory_max_usage
306 if not _memory_max_usage:
179 307 return
180 308
181 memory_usage_check_interval = worker._memory_usage_check_interval
182 memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
309 _memory_usage_check_interval = worker._memory_usage_check_interval
310 _memory_usage_recovery_threshold = memory_max_usage * worker._memory_usage_recovery_threshold
183 311
184 312 elapsed = time.time() - worker._last_memory_check_time
185 if elapsed > memory_usage_check_interval:
313 if elapsed > _memory_usage_check_interval:
186 314 mem_usage = _get_process_rss()
187 if mem_usage and mem_usage > memory_max_usage:
315 if mem_usage and mem_usage > _memory_max_usage:
188 316 worker.log.info(
189 317 "memory usage %s > %s, forcing gc",
190 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
318 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
191 319 # Try to clean it up by forcing a full collection.
192 320 gc.collect()
193 321 mem_usage = _get_process_rss()
194 if mem_usage > memory_usage_recovery_threshold:
322 if mem_usage > _memory_usage_recovery_threshold:
195 323 # Didn't clean up enough, we'll have to terminate.
196 324 worker.log.warning(
197 325 "memory usage %s > %s after gc, quitting",
198 _format_data_size(mem_usage), _format_data_size(memory_max_usage))
326 _format_data_size(mem_usage), _format_data_size(_memory_max_usage))
199 327 # This will cause worker to auto-restart itself
200 328 worker.alive = False
201 329 worker._last_memory_check_time = time.time()
202 330
203 331
204 332 def worker_int(worker):
205 worker.log.info("[%-10s] worker received INT or QUIT signal", worker.pid)
333 worker.log.info("pid=[%-10s] worker received INT or QUIT signal", worker.pid)
206 334
207 # get traceback info, on worker crash
208 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
335 # get traceback info, when a worker crashes
336 def get_thread_id(t_id):
337 id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
338 return id2name.get(t_id, "unknown_thread_id")
339
209 340 code = []
210 for thread_id, stack in sys._current_frames().items():
341 for thread_id, stack in sys._current_frames().items(): # noqa
211 342 code.append(
212 "\n# Thread: %s(%d)" % (id2name.get(thread_id, ""), thread_id))
343 "\n# Thread: %s(%d)" % (get_thread_id(thread_id), thread_id))
213 344 for fname, lineno, name, line in traceback.extract_stack(stack):
214 345 code.append('File: "%s", line %d, in %s' % (fname, lineno, name))
215 346 if line:
@@ -218,15 +349,15 b' def worker_int(worker):'
218 349
219 350
220 351 def worker_abort(worker):
221 worker.log.info("[%-10s] worker received SIGABRT signal", worker.pid)
352 worker.log.info("pid=[%-10s] worker received SIGABRT signal", worker.pid)
222 353
223 354
224 355 def worker_exit(server, worker):
225 worker.log.info("[%-10s] worker exit", worker.pid)
356 worker.log.info("pid=[%-10s] worker exit", worker.pid)
226 357
227 358
228 359 def child_exit(server, worker):
229 worker.log.info("[%-10s] worker child exit", worker.pid)
360 worker.log.info("pid=[%-10s] worker child exit", worker.pid)
230 361
231 362
232 363 def pre_request(worker, req):
@@ -245,6 +376,76 b' def post_request(worker, req, environ, r'
245 376 _check_memory_usage(worker)
246 377
247 378
379 def _filter_proxy(ip):
380 """
381 Passed in IP addresses in HEADERS can be in a special format of multiple
382 ips. Those comma separated IPs are passed from various proxies in the
383 chain of request processing. The left-most being the original client.
384 We only care about the first IP which came from the org. client.
385
386 :param ip: ip string from headers
387 """
388 if ',' in ip:
389 _ips = ip.split(',')
390 _first_ip = _ips[0].strip()
391 return _first_ip
392 return ip
393
394
395 def _filter_port(ip):
396 """
397 Removes a port from ip, there are 4 main cases to handle here.
398 - ipv4 eg. 127.0.0.1
399 - ipv6 eg. ::1
400 - ipv4+port eg. 127.0.0.1:8080
401 - ipv6+port eg. [::1]:8080
402
403 :param ip:
404 """
405 def is_ipv6(ip_addr):
406 if hasattr(socket, 'inet_pton'):
407 try:
408 socket.inet_pton(socket.AF_INET6, ip_addr)
409 except socket.error:
410 return False
411 else:
412 return False
413 return True
414
415 if ':' not in ip: # must be ipv4 pure ip
416 return ip
417
418 if '[' in ip and ']' in ip: # ipv6 with port
419 return ip.split(']')[0][1:].lower()
420
421 # must be ipv6 or ipv4 with port
422 if is_ipv6(ip):
423 return ip
424 else:
425 ip, _port = ip.split(':')[:2] # means ipv4+port
426 return ip
427
428
429 def get_ip_addr(environ):
430 proxy_key = 'HTTP_X_REAL_IP'
431 proxy_key2 = 'HTTP_X_FORWARDED_FOR'
432 def_key = 'REMOTE_ADDR'
433
434 def _filters(x):
435 return _filter_port(_filter_proxy(x))
436
437 ip = environ.get(proxy_key)
438 if ip:
439 return _filters(ip)
440
441 ip = environ.get(proxy_key2)
442 if ip:
443 return _filters(ip)
444
445 ip = environ.get(def_key, '0.0.0.0')
446 return _filters(ip)
447
448
248 449 class RhodeCodeLogger(Logger):
249 450 """
250 451 Custom Logger that allows some customization that gunicorn doesn't allow
@@ -258,8 +459,62 b' class RhodeCodeLogger(Logger):'
258 459 def now(self):
259 460 """ return date in RhodeCode Log format """
260 461 now = time.time()
261 msecs = int((now - long(now)) * 1000)
462 msecs = int((now - int(now)) * 1000)
262 463 return time.strftime(self.datefmt, time.localtime(now)) + '.{0:03d}'.format(msecs)
263 464
465 def atoms(self, resp, req, environ, request_time):
466 """ Gets atoms for log formatting.
467 """
468 status = resp.status
469 if isinstance(status, str):
470 status = status.split(None, 1)[0]
471 atoms = {
472 'h': get_ip_addr(environ),
473 'l': '-',
474 'u': self._get_user(environ) or '-',
475 't': self.now(),
476 'r': "%s %s %s" % (environ['REQUEST_METHOD'],
477 environ['RAW_URI'],
478 environ["SERVER_PROTOCOL"]),
479 's': status,
480 'm': environ.get('REQUEST_METHOD'),
481 'U': environ.get('PATH_INFO'),
482 'q': environ.get('QUERY_STRING'),
483 'H': environ.get('SERVER_PROTOCOL'),
484 'b': getattr(resp, 'sent', None) is not None and str(resp.sent) or '-',
485 'B': getattr(resp, 'sent', None),
486 'f': environ.get('HTTP_REFERER', '-'),
487 'a': environ.get('HTTP_USER_AGENT', '-'),
488 'T': request_time.seconds,
489 'D': (request_time.seconds * 1000000) + request_time.microseconds,
490 'M': (request_time.seconds * 1000) + int(request_time.microseconds/1000),
491 'L': "%d.%06d" % (request_time.seconds, request_time.microseconds),
492 'p': "<%s>" % os.getpid()
493 }
494
495 # add request headers
496 if hasattr(req, 'headers'):
497 req_headers = req.headers
498 else:
499 req_headers = req
500
501 if hasattr(req_headers, "items"):
502 req_headers = req_headers.items()
503
504 atoms.update({"{%s}i" % k.lower(): v for k, v in req_headers})
505
506 resp_headers = resp.headers
507 if hasattr(resp_headers, "items"):
508 resp_headers = resp_headers.items()
509
510 # add response headers
511 atoms.update({"{%s}o" % k.lower(): v for k, v in resp_headers})
512
513 # add environ variables
514 environ_variables = environ.items()
515 atoms.update({"{%s}e" % k.lower(): v for k, v in environ_variables})
516
517 return atoms
518
264 519
265 520 logger_class = RhodeCodeLogger
@@ -1,4 +1,4 b''
1 ## -*- coding: utf-8 -*-
1 #
2 2
3 3 ; #################################
4 4 ; RHODECODE VCSSERVER CONFIGURATION
@@ -7,102 +7,42 b''
7 7 [server:main]
8 8 ; COMMON HOST/IP CONFIG
9 9 host = 127.0.0.1
10 port = 9900
10 port = 10010
11 11
12 12
13 13 ; ###########################
14 14 ; GUNICORN APPLICATION SERVER
15 15 ; ###########################
16 16
17 ; run with gunicorn --log-config rhodecode.ini --paste rhodecode.ini
17 ; run with gunicorn --paste rhodecode.ini
18 18
19 19 ; Module to use, this setting shouldn't be changed
20 20 use = egg:gunicorn#main
21 21
22 ; Sets the number of process workers. More workers means more concurrent connections
23 ; RhodeCode can handle at the same time. Each additional worker also it increases
24 ; memory usage as each has it's own set of caches.
25 ; Recommended value is (2 * NUMBER_OF_CPUS + 1), eg 2CPU = 5 workers, but no more
26 ; than 8-10 unless for really big deployments .e.g 700-1000 users.
27 ; `instance_id = *` must be set in the [app:main] section below (which is the default)
28 ; when using more than 1 worker.
29 workers = 2
30
31 ; Gunicorn access log level
32 loglevel = info
33
34 ; Process name visible in process list
35 proc_name = rhodecode_vcsserver
36
37 ; Type of worker class, one of `sync`, `gevent`
38 ; currently `sync` is the only option allowed.
39 worker_class = sync
40
41 ; The maximum number of simultaneous clients. Valid only for gevent
42 worker_connections = 10
43
44 ; Max number of requests that worker will handle before being gracefully restarted.
45 ; Prevents memory leaks, jitter adds variability so not all workers are restarted at once.
46 max_requests = 1000
47 max_requests_jitter = 30
48
49 ; Amount of time a worker can spend with handling a request before it
50 ; gets killed and restarted. By default set to 21600 (6hrs)
51 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
52 timeout = 21600
53
54 ; The maximum size of HTTP request line in bytes.
55 ; 0 for unlimited
56 limit_request_line = 0
57
58 ; Limit the number of HTTP headers fields in a request.
59 ; By default this value is 100 and can't be larger than 32768.
60 limit_request_fields = 32768
61
62 ; Limit the allowed size of an HTTP request header field.
63 ; Value is a positive number or 0.
64 ; Setting it to 0 will allow unlimited header field sizes.
65 limit_request_field_size = 0
66
67 ; Timeout for graceful workers restart.
68 ; After receiving a restart signal, workers have this much time to finish
69 ; serving requests. Workers still alive after the timeout (starting from the
70 ; receipt of the restart signal) are force killed.
71 ; Examples: 1800 (30min), 3600 (1hr), 7200 (2hr), 43200 (12h)
72 graceful_timeout = 3600
73
74 # The number of seconds to wait for requests on a Keep-Alive connection.
75 # Generally set in the 1-5 seconds range.
76 keepalive = 2
77
78 ; Maximum memory usage that each worker can use before it will receive a
79 ; graceful restart signal 0 = memory monitoring is disabled
80 ; Examples: 268435456 (256MB), 536870912 (512MB)
81 ; 1073741824 (1GB), 2147483648 (2GB), 4294967296 (4GB)
82 memory_max_usage = 0
83
84 ; How often in seconds to check for memory usage for each gunicorn worker
85 memory_usage_check_interval = 60
86
87 ; Threshold value for which we don't recycle worker if GarbageCollection
88 ; frees up enough resources. Before each restart we try to run GC on worker
89 ; in case we get enough free memory after that, restart will not happen.
90 memory_usage_recovery_threshold = 0.8
91
92
93 22 [app:main]
94 23 ; The %(here)s variable will be replaced with the absolute path of parent directory
95 24 ; of this file
25 ; Each option in the app:main can be override by an environmental variable
26 ;
27 ;To override an option:
28 ;
29 ;RC_<KeyName>
30 ;Everything should be uppercase, . and - should be replaced by _.
31 ;For example, if you have these configuration settings:
32 ;rc_cache.repo_object.backend = foo
33 ;can be overridden by
34 ;export RC_CACHE_REPO_OBJECT_BACKEND=foo
35
96 36 use = egg:rhodecode-vcsserver
97 37
98 38 ; Pyramid default locales, we need this to be set
99 pyramid.default_locale_name = en
39 #pyramid.default_locale_name = en
100 40
101 41 ; default locale used by VCS systems
102 locale = en_US.UTF-8
42 #locale = en_US.UTF-8
103 43
104 44 ; path to binaries for vcsserver, it should be set by the installer
105 ; at installation time, e.g /home/user/vcsserver-1/profile/bin
45 ; at installation time, e.g /home/user/.rccontrol/vcsserver-1/profile/bin
106 46 ; it can also be a path to nix-build output in case of development
107 47 core.binary_dir = ""
108 48
@@ -116,21 +56,21 b' core.binary_dir = ""'
116 56
117 57 ; Default cache dir for caches. Putting this into a ramdisk can boost performance.
118 58 ; eg. /tmpfs/data_ramdisk, however this directory might require large amount of space
119 cache_dir = %(here)s/data
59 #cache_dir = %(here)s/data
120 60
121 61 ; ***************************************
122 62 ; `repo_object` cache, default file based
123 63 ; ***************************************
124 64
125 65 ; `repo_object` cache settings for vcs methods for repositories
126 rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
66 #rc_cache.repo_object.backend = dogpile.cache.rc.file_namespace
127 67
128 68 ; cache auto-expires after N seconds
129 69 ; Examples: 86400 (1Day), 604800 (7Days), 1209600 (14Days), 2592000 (30days), 7776000 (90Days)
130 rc_cache.repo_object.expiration_time = 2592000
70 #rc_cache.repo_object.expiration_time = 2592000
131 71
132 72 ; file cache store path. Defaults to `cache_dir =` value or tempdir if both values are not set
133 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache.db
73 #rc_cache.repo_object.arguments.filename = /tmp/vcsserver_cache_repo_object.db
134 74
135 75 ; ***********************************************************
136 76 ; `repo_object` cache with redis backend
@@ -157,16 +97,29 b' rc_cache.repo_object.expiration_time = 2'
157 97 ; auto-renew lock to prevent stale locks, slower but safer. Use only if problems happen
158 98 #rc_cache.repo_object.arguments.lock_auto_renewal = true
159 99
160 ; Statsd client config
100 ; Statsd client config, this is used to send metrics to statsd
101 ; We recommend setting statsd_exported and scrape them using Promethues
161 102 #statsd.enabled = false
162 103 #statsd.statsd_host = 0.0.0.0
163 104 #statsd.statsd_port = 8125
164 105 #statsd.statsd_prefix =
165 106 #statsd.statsd_ipv6 = false
166 107
108 ; configure logging automatically at server startup set to false
109 ; to use the below custom logging config.
110 ; RC_LOGGING_FORMATTER
111 ; RC_LOGGING_LEVEL
112 ; env variables can control the settings for logging in case of autoconfigure
113
114 #logging.autoconfigure = true
115
116 ; specify your own custom logging config file to configure logging
117 #logging.logging_conf_file = /path/to/custom_logging.ini
118
167 119 ; #####################
168 120 ; LOGGING CONFIGURATION
169 121 ; #####################
122
170 123 [loggers]
171 124 keys = root, vcsserver
172 125
@@ -174,7 +127,7 b' keys = root, vcsserver'
174 127 keys = console
175 128
176 129 [formatters]
177 keys = generic
130 keys = generic, json
178 131
179 132 ; #######
180 133 ; LOGGERS
@@ -184,12 +137,11 b' level = NOTSET'
184 137 handlers = console
185 138
186 139 [logger_vcsserver]
187 level = DEBUG
140 level = INFO
188 141 handlers =
189 142 qualname = vcsserver
190 143 propagate = 1
191 144
192
193 145 ; ########
194 146 ; HANDLERS
195 147 ; ########
@@ -198,6 +150,8 b' propagate = 1'
198 150 class = StreamHandler
199 151 args = (sys.stderr, )
200 152 level = INFO
153 ; To enable JSON formatted logs replace 'generic' with 'json'
154 ; This allows sending properly formatted logs to grafana loki or elasticsearch
201 155 formatter = generic
202 156
203 157 ; ##########
@@ -207,3 +161,7 b' formatter = generic'
207 161 [formatter_generic]
208 162 format = %(asctime)s.%(msecs)03d [%(process)d] %(levelname)-5.5s [%(name)s] %(message)s
209 163 datefmt = %Y-%m-%d %H:%M:%S
164
165 [formatter_json]
166 format = %(timestamp)s %(levelname)s %(name)s %(message)s %(req_id)s
167 class = vcsserver.lib._vendor.jsonlogger.JsonFormatter
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -16,13 +16,12 b''
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import socket
19
20 19 import pytest
21 20
22 21
23 22 def pytest_addoption(parser):
24 23 parser.addoption(
25 '--repeat', type=int, default=100,
24 '--perf-repeat-vcs', type=int, default=100,
26 25 help="Number of repetitions in performance tests.")
27 26
28 27
@@ -34,13 +33,13 b' def repeat(request):'
34 33 Slower calls may divide it by 10 or 100. It is chosen in a way so that the
35 34 tests are not too slow in our default test suite.
36 35 """
37 return request.config.getoption('--repeat')
36 return request.config.getoption('--perf-repeat-vcs')
38 37
39 38
40 39 @pytest.fixture(scope='session')
41 40 def vcsserver_port(request):
42 41 port = get_available_port()
43 print('Using vcsserver port %s' % (port, ))
42 print(f'Using vcsserver port {port}')
44 43 return port
45 44
46 45
@@ -1,48 +1,77 b''
1 ## dependencies
2
3 # our custom configobj
4 https://code.rhodecode.com/upstream/configobj/artifacts/download/0-012de99a-b1e1-4f64-a5c0-07a98a41b324.tar.gz?md5=6a513f51fe04b2c18cf84c1395a7c626#egg=configobj==5.0.6
5
6 dogpile.cache==0.9.0
7 dogpile.core==0.4.1
8 decorator==4.1.2
9 dulwich==0.13.0
10 hgsubversion==1.9.3
11 hg-evolve==9.1.0
12 mako==1.1.0
13 markupsafe==1.1.1
14 mercurial==5.1.1
15 msgpack-python==0.5.6
16
17 pastedeploy==2.1.0
18 pyramid==1.10.4
19 pyramid-mako==1.1.0
20 pygit2==0.28.2
1 # deps, generated via pipdeptree --exclude setuptools,wheel,pipdeptree,pip -f | tr '[:upper:]' '[:lower:]'
21 2
3 async-timeout==4.0.3
4 atomicwrites==1.4.1
5 celery==5.3.6
6 billiard==4.2.0
7 click==8.1.3
8 click-didyoumean==0.3.0
9 click==8.1.3
10 click-plugins==1.1.1
11 click==8.1.3
12 click-repl==0.2.0
13 click==8.1.3
14 prompt-toolkit==3.0.38
15 wcwidth==0.2.6
16 six==1.16.0
17 kombu==5.3.5
18 amqp==5.2.0
19 vine==5.1.0
20 vine==5.1.0
21 python-dateutil==2.8.2
22 six==1.16.0
23 tzdata==2023.4
24 vine==5.1.0
25 contextlib2==21.6.0
26 cov-core==1.15.0
27 coverage==7.2.3
28 diskcache==5.6.3
29 dogpile.cache==1.3.0
30 decorator==5.1.1
31 stevedore==5.1.0
32 pbr==5.11.1
33 dulwich==0.21.6
34 urllib3==1.26.14
35 gunicorn==21.2.0
36 packaging==23.1
37 hg-evolve==11.0.2
38 importlib-metadata==6.0.0
39 zipp==3.15.0
40 mercurial==6.3.3
41 mock==5.0.2
42 more-itertools==9.1.0
43 msgpack==1.0.7
44 orjson==3.9.13
45 psutil==5.9.8
46 py==1.11.0
47 pygit2==1.13.3
48 cffi==1.16.0
49 pycparser==2.21
50 pygments==2.15.1
51 pyparsing==3.1.1
52 pyramid==2.0.2
53 hupper==1.12
54 plaster==1.1.2
55 plaster-pastedeploy==1.0.1
56 pastedeploy==3.1.0
57 plaster==1.1.2
58 translationstring==1.4
59 venusian==3.0.0
60 webob==1.8.7
61 zope.deprecation==5.0.0
62 zope.interface==6.1.0
63 redis==5.0.1
64 async-timeout==4.0.3
22 65 repoze.lru==0.7
23 redis==3.5.3
24 simplejson==3.16.0
25 subprocess32==3.5.4
26 subvertpy==0.10.1
66 scandir==1.10.0
67 setproctitle==1.3.3
68 subvertpy==0.11.0
69 waitress==3.0.0
70 wcwidth==0.2.6
27 71
28 six==1.11.0
29 translationstring==1.3
30 webob==1.8.5
31 zope.deprecation==4.4.0
32 zope.interface==4.6.0
33
34 ## http servers
35 gevent==1.5.0
36 greenlet==0.4.15
37 gunicorn==19.9.0
38 waitress==1.3.1
39
40 ## debug
41 ipdb==0.13.2
42 ipython==5.1.0
43 72
44 73 ## test related requirements
45 -r requirements_test.txt
74 #-r requirements_test.txt
46 75
47 76 ## uncomment to add the debug libraries
48 77 #-r requirements_debug.txt
@@ -1,8 +1,28 b''
1 1 ## special libraries we could extend the requirements.txt file with to add some
2 ## custom libraries useful for debug and memory tracing
3
4 ## uncomment inclusion of this file in requirements.txt run make generate-pkgs and nix-shell
2 ## custom libraries usefull for debug and memory tracing
5 3
6 4 objgraph
7 5 memory-profiler
8 6 pympler
7
8 ## debug
9 ipdb
10 ipython
11 rich
12
13 # format
14 flake8
15 ruff
16
17 pipdeptree==2.7.1
18 invoke==2.0.0
19 bumpversion==0.6.0
20 bump2version==1.0.1
21
22 docutils-stubs
23 types-redis
24 types-requests==2.31.0.6
25 types-sqlalchemy
26 types-psutil
27 types-pycurl
28 types-ujson
@@ -1,16 +1,45 b''
1 1 # test related requirements
2 pytest==4.6.5
3 py==1.8.0
4 pytest-cov==2.7.1
5 pytest-sugar==0.9.2
6 pytest-runner==5.1.0
2
3 cov-core==1.15.0
4 coverage==7.2.3
5 mock==5.0.2
6 py==1.11.0
7 pytest-cov==4.0.0
8 coverage==7.2.3
9 pytest==7.3.1
10 attrs==22.2.0
11 iniconfig==2.0.0
12 packaging==23.1
13 pluggy==1.0.0
7 14 pytest-profiling==1.7.0
8 pytest-timeout==1.3.3
9 gprof2dot==2017.9.19
15 gprof2dot==2022.7.29
16 pytest==7.3.1
17 attrs==22.2.0
18 iniconfig==2.0.0
19 packaging==23.1
20 pluggy==1.0.0
21 six==1.16.0
22 pytest-runner==6.0.0
23 pytest-sugar==0.9.7
24 packaging==23.1
25 pytest==7.3.1
26 attrs==22.2.0
27 iniconfig==2.0.0
28 packaging==23.1
29 pluggy==1.0.0
30 termcolor==2.3.0
31 pytest-timeout==2.1.0
32 pytest==7.3.1
33 attrs==22.2.0
34 iniconfig==2.0.0
35 packaging==23.1
36 pluggy==1.0.0
37 webtest==3.0.0
38 beautifulsoup4==4.11.2
39 soupsieve==2.4
40 waitress==3.0.0
41 webob==1.8.7
10 42
11 mock==3.0.5
12 cov-core==1.15.0
13 coverage==4.5.4
14
15 webtest==2.0.34
16 beautifulsoup4==4.6.3
43 # RhodeCode test-data
44 rc_testdata @ https://code.rhodecode.com/upstream/rc-testdata-dist/raw/77378e9097f700b4c1b9391b56199fe63566b5c9/rc_testdata-0.11.0.tar.gz#egg=rc_testdata
45 rc_testdata==0.11.0
@@ -1,1 +1,1 b''
1 4.27.1 No newline at end of file
1 5.0.0 No newline at end of file
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -15,10 +15,23 b''
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import pkgutil
18 import os
19
20 __version__ = ''
19 21
20 22
21 __version__ = pkgutil.get_data('vcsserver', 'VERSION').strip()
23 def get_version():
24 global __version__
25 if __version__:
26 return __version__
27
28 here = os.path.abspath(os.path.dirname(__file__))
29 ver_file = os.path.join(here, "VERSION")
30 with open(ver_file, "rt") as f:
31 version = f.read().strip()
32
33 __version__ = version
34 return version
22 35
23 36 # link to config for pyramid
24 37 CONFIG = {}
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -16,18 +16,21 b''
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import os
18 18 import sys
19 import traceback
19 import tempfile
20 20 import logging
21 import urlparse
21 import urllib.parse
22
23 from vcsserver.lib.rc_cache.archive_cache import get_archival_cache_store
22 24
23 25 from vcsserver import exceptions
24 26 from vcsserver.exceptions import NoContentException
25 from vcsserver.hgcompat import (archival)
26
27 from vcsserver.hgcompat import archival
28 from vcsserver.str_utils import safe_bytes
29 from vcsserver.lib.exc_tracking import format_exc
27 30 log = logging.getLogger(__name__)
28 31
29 32
30 class RepoFactory(object):
33 class RepoFactory:
31 34 """
32 35 Utility to create instances of repository
33 36
@@ -55,31 +58,33 b' def obfuscate_qs(query_string):'
55 58 return None
56 59
57 60 parsed = []
58 for k, v in urlparse.parse_qsl(query_string, keep_blank_values=True):
61 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
59 62 if k in ['auth_token', 'api_key']:
60 63 v = "*****"
61 64 parsed.append((k, v))
62 65
63 66 return '&'.join('{}{}'.format(
64 k, '={}'.format(v) if v else '') for k, v in parsed)
67 k, f'={v}' if v else '') for k, v in parsed)
65 68
66 69
67 def raise_from_original(new_type):
70 def raise_from_original(new_type, org_exc: Exception):
68 71 """
69 72 Raise a new exception type with original args and traceback.
70 73 """
71 exc_type, exc_value, exc_traceback = sys.exc_info()
74 exc_info = sys.exc_info()
75 exc_type, exc_value, exc_traceback = exc_info
72 76 new_exc = new_type(*exc_value.args)
77
73 78 # store the original traceback into the new exc
74 new_exc._org_exc_tb = traceback.format_exc(exc_traceback)
79 new_exc._org_exc_tb = format_exc(exc_info)
75 80
76 81 try:
77 raise new_exc, None, exc_traceback
82 raise new_exc.with_traceback(exc_traceback)
78 83 finally:
79 84 del exc_traceback
80 85
81 86
82 class ArchiveNode(object):
87 class ArchiveNode:
83 88 def __init__(self, path, mode, is_link, raw_bytes):
84 89 self.path = path
85 90 self.mode = mode
@@ -87,34 +92,59 b' class ArchiveNode(object):'
87 92 self.raw_bytes = raw_bytes
88 93
89 94
90 def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
91 archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
95 def store_archive_in_cache(node_walker, archive_key, kind, mtime, archive_at_path, archive_dir_name,
96 commit_id, write_metadata=True, extra_metadata=None, cache_config=None):
92 97 """
93 walker should be a file walker, for example:
94 def walker():
98 Function that would store generate archive and send it to a dedicated backend store
99 In here we use diskcache
100
101 :param node_walker: a generator returning nodes to add to archive
102 :param archive_key: key used to store the path
103 :param kind: archive kind
104 :param mtime: time of creation
105 :param archive_at_path: default '/' the path at archive was started.
106 If this is not '/' it means it's a partial archive
107 :param archive_dir_name: inside dir name when creating an archive
108 :param commit_id: commit sha of revision archive was created at
109 :param write_metadata:
110 :param extra_metadata:
111 :param cache_config:
112
113 walker should be a file walker, for example,
114 def node_walker():
95 115 for file_info in files:
96 116 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
97 117 """
98 118 extra_metadata = extra_metadata or {}
99 119
120 d_cache = get_archival_cache_store(config=cache_config)
121
122 if archive_key in d_cache:
123 with d_cache as d_cache_reader:
124 reader, tag = d_cache_reader.get(archive_key, read=True, tag=True, retry=True)
125 return reader.name
126
127 archive_tmp_path = safe_bytes(tempfile.mkstemp()[1])
128 log.debug('Creating new temp archive in %s', archive_tmp_path)
129
100 130 if kind == "tgz":
101 archiver = archival.tarit(archive_dest_path, mtime, "gz")
131 archiver = archival.tarit(archive_tmp_path, mtime, b"gz")
102 132 elif kind == "tbz2":
103 archiver = archival.tarit(archive_dest_path, mtime, "bz2")
133 archiver = archival.tarit(archive_tmp_path, mtime, b"bz2")
104 134 elif kind == 'zip':
105 archiver = archival.zipit(archive_dest_path, mtime)
135 archiver = archival.zipit(archive_tmp_path, mtime)
106 136 else:
107 137 raise exceptions.ArchiveException()(
108 'Remote does not support: "%s" archive type.' % kind)
138 f'Remote does not support: "{kind}" archive type.')
109 139
110 for f in walker(commit_id, archive_at_path):
111 f_path = os.path.join(archive_dir_name, f.path.lstrip('/'))
140 for f in node_walker(commit_id, archive_at_path):
141 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
112 142 try:
113 143 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
114 144 except NoContentException:
115 145 # NOTE(marcink): this is a special case for SVN so we can create "empty"
116 # directories which arent supported by archiver
117 archiver.addfile(os.path.join(f_path, '.dir'), f.mode, f.is_link, '')
146 # directories which are not supported by archiver
147 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
118 148
119 149 if write_metadata:
120 150 metadata = dict([
@@ -123,8 +153,41 b' def archive_repo(walker, archive_dest_pa'
123 153 ])
124 154 metadata.update(extra_metadata)
125 155
126 meta = ["%s:%s" % (f_name, value) for f_name, value in metadata.items()]
127 f_path = os.path.join(archive_dir_name, '.archival.txt')
128 archiver.addfile(f_path, 0o644, False, '\n'.join(meta))
156 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
157 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
158 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
159
160 archiver.done()
161
162 # ensure set & get are atomic
163 with d_cache.transact():
164
165 with open(archive_tmp_path, 'rb') as archive_file:
166 add_result = d_cache.set(archive_key, archive_file, read=True, tag='db-name', retry=True)
167 if not add_result:
168 log.error('Failed to store cache for key=%s', archive_key)
169
170 os.remove(archive_tmp_path)
129 171
130 return archiver.done()
172 reader, tag = d_cache.get(archive_key, read=True, tag=True, retry=True)
173 if not reader:
174 raise AssertionError(f'empty reader on key={archive_key} added={add_result}')
175
176 return reader.name
177
178
179 class BinaryEnvelope:
180 def __init__(self, val):
181 self.val = val
182
183
184 class BytesEnvelope(bytes):
185 def __new__(cls, content):
186 if isinstance(content, bytes):
187 return super().__new__(cls, content)
188 else:
189 raise TypeError('BytesEnvelope content= param must be bytes. Use BinaryEnvelope to wrap other types')
190
191
192 class BinaryBytesEnvelope(BytesEnvelope):
193 pass
@@ -1,3 +1,5 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Provides a stub implementation for VCS operations.
3 5
@@ -1,3 +1,5 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Implementation of :class:`EchoApp`.
3 5
@@ -10,7 +12,7 b' import logging'
10 12 log = logging.getLogger(__name__)
11 13
12 14
13 class EchoApp(object):
15 class EchoApp:
14 16
15 17 def __init__(self, repo_path, repo_name, config):
16 18 self._repo_path = repo_path
@@ -23,10 +25,10 b' class EchoApp(object):'
23 25 status = '200 OK'
24 26 headers = [('Content-Type', 'text/plain')]
25 27 start_response(status, headers)
26 return ["ECHO"]
28 return [b"ECHO"]
27 29
28 30
29 class EchoAppStream(object):
31 class EchoAppStream:
30 32
31 33 def __init__(self, repo_path, repo_name, config):
32 34 self._repo_path = repo_path
@@ -41,8 +43,8 b' class EchoAppStream(object):'
41 43 start_response(status, headers)
42 44
43 45 def generator():
44 for _ in xrange(1000000):
45 yield "ECHO"
46 for _ in range(1000000):
47 yield b"ECHO_STREAM"
46 48 return generator()
47 49
48 50
@@ -1,3 +1,5 b''
1 # Copyright (C) 2014-2023 RhodeCode GmbH
2
1 3 """
2 4 Provides the same API as :mod:`remote_wsgi`.
3 5
@@ -13,7 +15,7 b' from vcsserver import wsgi_app_caller'
13 15 log = logging.getLogger(__name__)
14 16
15 17
16 class GitRemoteWsgi(object):
18 class GitRemoteWsgi:
17 19 def handle(self, environ, input_data, *args, **kwargs):
18 20 app = wsgi_app_caller.WSGIAppCaller(
19 21 create_echo_wsgi_app(*args, **kwargs))
@@ -21,7 +23,7 b' class GitRemoteWsgi(object):'
21 23 return app.handle(environ, input_data)
22 24
23 25
24 class HgRemoteWsgi(object):
26 class HgRemoteWsgi:
25 27 def handle(self, environ, input_data, *args, **kwargs):
26 28 app = wsgi_app_caller.WSGIAppCaller(
27 29 create_echo_wsgi_app(*args, **kwargs))
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -109,7 +109,7 b' class HTTPRepoLocked(HTTPLocked):'
109 109 def __init__(self, title, status_code=None, **kwargs):
110 110 self.code = status_code or HTTPLocked.code
111 111 self.title = title
112 super(HTTPRepoLocked, self).__init__(**kwargs)
112 super().__init__(**kwargs)
113 113
114 114
115 115 class HTTPRepoBranchProtected(HTTPForbidden):
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -16,4 +16,4 b''
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 from app import create_app
19 from .app import create_app # noqa
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -17,23 +17,22 b''
17 17
18 18 import re
19 19 import logging
20 from wsgiref.util import FileWrapper
21 20
22 import simplejson as json
23 21 from pyramid.config import Configurator
24 22 from pyramid.response import Response, FileIter
25 23 from pyramid.httpexceptions import (
26 24 HTTPBadRequest, HTTPNotImplemented, HTTPNotFound, HTTPForbidden,
27 25 HTTPUnprocessableEntity)
28 26
27 from vcsserver.lib.rc_json import json
29 28 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
30 29 from vcsserver.git_lfs.utils import safe_result, get_cython_compat_decorator
31 from vcsserver.utils import safe_int
30 from vcsserver.str_utils import safe_int
32 31
33 32 log = logging.getLogger(__name__)
34 33
35 34
36 GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' #+json ?
35 GIT_LFS_CONTENT_TYPE = 'application/vnd.git-lfs' # +json ?
37 36 GIT_LFS_PROTO_PAT = re.compile(r'^/(.+)/(info/lfs/(.+))')
38 37
39 38
@@ -48,7 +47,7 b' def write_response_error(http_exception,'
48 47 return _exception
49 48
50 49
51 class AuthHeaderRequired(object):
50 class AuthHeaderRequired:
52 51 """
53 52 Decorator to check if request has proper auth-header
54 53 """
@@ -95,7 +94,7 b' def lfs_objects_batch(request):'
95 94 if operation not in ('download', 'upload'):
96 95 log.debug('LFS: unsupported operation:%s', operation)
97 96 return write_response_error(
98 HTTPBadRequest, 'unsupported operation mode: `%s`' % operation)
97 HTTPBadRequest, f'unsupported operation mode: `{operation}`')
99 98
100 99 if 'objects' not in data:
101 100 log.debug('LFS: missing objects data')
@@ -115,8 +114,13 b' def lfs_objects_batch(request):'
115 114 HTTPBadRequest, 'unsupported data in objects')
116 115
117 116 obj_data = {'oid': oid}
117 if http_scheme == 'http':
118 # Note(marcink): when using http, we might have a custom port
119 # so we skip setting it to http, url dispatch then wont generate a port in URL
120 # for development we need this
121 http_scheme = None
118 122
119 obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
123 obj_href = request.route_url('lfs_objects_oid', repo=repo, oid=oid,
120 124 _scheme=http_scheme)
121 125 obj_verify_href = request.route_url('lfs_objects_verify', repo=repo,
122 126 _scheme=http_scheme)
@@ -179,7 +183,7 b' def lfs_objects_oid_download(request):'
179 183 if not store.has_oid():
180 184 log.debug('LFS: oid %s does not exists in store', oid)
181 185 return write_response_error(
182 HTTPNotFound, 'requested file with oid `%s` not found in store' % oid)
186 HTTPNotFound, f'requested file with oid `{oid}` not found in store')
183 187
184 188 # TODO(marcink): support range header ?
185 189 # Range: bytes=0-, `bytes=(\d+)\-.*`
@@ -208,11 +212,11 b' def lfs_objects_verify(request):'
208 212 if not store.has_oid():
209 213 log.debug('LFS: oid %s does not exists in store', oid)
210 214 return write_response_error(
211 HTTPNotFound, 'oid `%s` does not exists in store' % oid)
215 HTTPNotFound, f'oid `{oid}` does not exists in store')
212 216
213 217 store_size = store.size_oid()
214 218 if store_size != size:
215 msg = 'requested file size mismatch store size:%s requested:%s' % (
219 msg = 'requested file size mismatch store size:{} requested:{}'.format(
216 220 store_size, size)
217 221 return write_response_error(
218 222 HTTPUnprocessableEntity, msg)
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -23,7 +23,7 b' from collections import OrderedDict'
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 class OidHandler(object):
26 class OidHandler:
27 27
28 28 def __init__(self, store, repo_name, auth, oid, obj_size, obj_data, obj_href,
29 29 obj_verify_href=None):
@@ -51,7 +51,7 b' class OidHandler(object):'
51 51
52 52 if not store.has_oid():
53 53 # error reply back to client that something is wrong with dl
54 err_msg = 'object: {} does not exist in store'.format(store.oid)
54 err_msg = f'object: {store.oid} does not exist in store'
55 55 has_errors = OrderedDict(
56 56 error=OrderedDict(
57 57 code=404,
@@ -113,12 +113,14 b' class OidHandler(object):'
113 113 return handler(*args, **kwargs)
114 114
115 115
116 class LFSOidStore(object):
116 class LFSOidStore:
117 117
118 118 def __init__(self, oid, repo, store_location=None):
119 119 self.oid = oid
120 120 self.repo = repo
121 self.store_path = store_location or self.get_default_store()
121 defined_store_path = store_location or self.get_default_store()
122 self.store_suffix = f"/objects/{oid[:2]}/{oid[2:4]}"
123 self.store_path = f"{defined_store_path.rstrip('/')}{self.store_suffix}"
122 124 self.tmp_oid_path = os.path.join(self.store_path, oid + '.tmp')
123 125 self.oid_path = os.path.join(self.store_path, oid)
124 126 self.fd = None
@@ -130,7 +132,7 b' class LFSOidStore(object):'
130 132 f.write('...')
131 133 """
132 134
133 class StoreEngine(object):
135 class StoreEngine:
134 136 def __init__(self, mode, store_path, oid_path, tmp_oid_path):
135 137 self.mode = mode
136 138 self.store_path = store_path
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -18,9 +18,11 b''
18 18 import os
19 19 import pytest
20 20 from webtest.app import TestApp as WebObTestApp
21 import simplejson as json
22 21
22 from vcsserver.lib.rc_json import json
23 from vcsserver.str_utils import safe_bytes
23 24 from vcsserver.git_lfs.app import create_app
25 from vcsserver.git_lfs.lib import LFSOidStore
24 26
25 27
26 28 @pytest.fixture(scope='function')
@@ -46,7 +48,7 b' def http_auth():'
46 48 return {'HTTP_AUTHORIZATION': "Basic XXXXX"}
47 49
48 50
49 class TestLFSApplication(object):
51 class TestLFSApplication:
50 52
51 53 def test_app_wrong_path(self, git_lfs_app):
52 54 git_lfs_app.get('/repo/info/lfs/xxx', status=404)
@@ -54,19 +56,19 b' class TestLFSApplication(object):'
54 56 def test_app_deprecated_endpoint(self, git_lfs_app):
55 57 response = git_lfs_app.post('/repo/info/lfs/objects', status=501)
56 58 assert response.status_code == 501
57 assert json.loads(response.text) == {u'message': u'LFS: v1 api not supported'}
59 assert json.loads(response.text) == {'message': 'LFS: v1 api not supported'}
58 60
59 61 def test_app_lock_verify_api_not_available(self, git_lfs_app):
60 62 response = git_lfs_app.post('/repo/info/lfs/locks/verify', status=501)
61 63 assert response.status_code == 501
62 64 assert json.loads(response.text) == {
63 u'message': u'GIT LFS locking api not supported'}
65 'message': 'GIT LFS locking api not supported'}
64 66
65 67 def test_app_lock_api_not_available(self, git_lfs_app):
66 68 response = git_lfs_app.post('/repo/info/lfs/locks', status=501)
67 69 assert response.status_code == 501
68 70 assert json.loads(response.text) == {
69 u'message': u'GIT LFS locking api not supported'}
71 'message': 'GIT LFS locking api not supported'}
70 72
71 73 def test_app_batch_api_missing_auth(self, git_lfs_app):
72 74 git_lfs_app.post_json(
@@ -77,14 +79,14 b' class TestLFSApplication(object):'
77 79 '/repo/info/lfs/objects/batch', params={}, status=400,
78 80 extra_environ=http_auth)
79 81 assert json.loads(response.text) == {
80 u'message': u'unsupported operation mode: `None`'}
82 'message': 'unsupported operation mode: `None`'}
81 83
82 84 def test_app_batch_api_missing_objects(self, git_lfs_app, http_auth):
83 85 response = git_lfs_app.post_json(
84 86 '/repo/info/lfs/objects/batch', params={'operation': 'download'},
85 87 status=400, extra_environ=http_auth)
86 88 assert json.loads(response.text) == {
87 u'message': u'missing objects data'}
89 'message': 'missing objects data'}
88 90
89 91 def test_app_batch_api_unsupported_data_in_objects(
90 92 self, git_lfs_app, http_auth):
@@ -94,7 +96,7 b' class TestLFSApplication(object):'
94 96 '/repo/info/lfs/objects/batch', params=params, status=400,
95 97 extra_environ=http_auth)
96 98 assert json.loads(response.text) == {
97 u'message': u'unsupported data in objects'}
99 'message': 'unsupported data in objects'}
98 100
99 101 def test_app_batch_api_download_missing_object(
100 102 self, git_lfs_app, http_auth):
@@ -105,23 +107,23 b' class TestLFSApplication(object):'
105 107 extra_environ=http_auth)
106 108
107 109 expected_objects = [
108 {u'authenticated': True,
109 u'errors': {u'error': {
110 u'code': 404,
111 u'message': u'object: 123 does not exist in store'}},
112 u'oid': u'123',
113 u'size': u'1024'}
110 {'authenticated': True,
111 'errors': {'error': {
112 'code': 404,
113 'message': 'object: 123 does not exist in store'}},
114 'oid': '123',
115 'size': '1024'}
114 116 ]
115 117 assert json.loads(response.text) == {
116 118 'objects': expected_objects, 'transfer': 'basic'}
117 119
118 120 def test_app_batch_api_download(self, git_lfs_app, http_auth):
119 121 oid = '456'
120 oid_path = os.path.join(git_lfs_app._store, oid)
122 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
121 123 if not os.path.isdir(os.path.dirname(oid_path)):
122 124 os.makedirs(os.path.dirname(oid_path))
123 125 with open(oid_path, 'wb') as f:
124 f.write('OID_CONTENT')
126 f.write(safe_bytes('OID_CONTENT'))
125 127
126 128 params = {'operation': 'download',
127 129 'objects': [{'oid': oid, 'size': '1024'}]}
@@ -130,14 +132,14 b' class TestLFSApplication(object):'
130 132 extra_environ=http_auth)
131 133
132 134 expected_objects = [
133 {u'authenticated': True,
134 u'actions': {
135 u'download': {
136 u'header': {u'Authorization': u'Basic XXXXX'},
137 u'href': u'http://localhost/repo/info/lfs/objects/456'},
135 {'authenticated': True,
136 'actions': {
137 'download': {
138 'header': {'Authorization': 'Basic XXXXX'},
139 'href': 'http://localhost/repo/info/lfs/objects/456'},
138 140 },
139 u'oid': u'456',
140 u'size': u'1024'}
141 'oid': '456',
142 'size': '1024'}
141 143 ]
142 144 assert json.loads(response.text) == {
143 145 'objects': expected_objects, 'transfer': 'basic'}
@@ -149,18 +151,18 b' class TestLFSApplication(object):'
149 151 '/repo/info/lfs/objects/batch', params=params,
150 152 extra_environ=http_auth)
151 153 expected_objects = [
152 {u'authenticated': True,
153 u'actions': {
154 u'upload': {
155 u'header': {u'Authorization': u'Basic XXXXX',
156 u'Transfer-Encoding': u'chunked'},
157 u'href': u'http://localhost/repo/info/lfs/objects/123'},
158 u'verify': {
159 u'header': {u'Authorization': u'Basic XXXXX'},
160 u'href': u'http://localhost/repo/info/lfs/verify'}
154 {'authenticated': True,
155 'actions': {
156 'upload': {
157 'header': {'Authorization': 'Basic XXXXX',
158 'Transfer-Encoding': 'chunked'},
159 'href': 'http://localhost/repo/info/lfs/objects/123'},
160 'verify': {
161 'header': {'Authorization': 'Basic XXXXX'},
162 'href': 'http://localhost/repo/info/lfs/verify'}
161 163 },
162 u'oid': u'123',
163 u'size': u'1024'}
164 'oid': '123',
165 'size': '1024'}
164 166 ]
165 167 assert json.loads(response.text) == {
166 168 'objects': expected_objects, 'transfer': 'basic'}
@@ -172,18 +174,18 b' class TestLFSApplication(object):'
172 174 '/repo/info/lfs/objects/batch', params=params,
173 175 extra_environ=http_auth)
174 176 expected_objects = [
175 {u'authenticated': True,
176 u'actions': {
177 u'upload': {
178 u'header': {u'Authorization': u'Basic XXXXX',
179 u'Transfer-Encoding': u'chunked'},
180 u'href': u'https://localhost/repo/info/lfs/objects/123'},
181 u'verify': {
182 u'header': {u'Authorization': u'Basic XXXXX'},
183 u'href': u'https://localhost/repo/info/lfs/verify'}
177 {'authenticated': True,
178 'actions': {
179 'upload': {
180 'header': {'Authorization': 'Basic XXXXX',
181 'Transfer-Encoding': 'chunked'},
182 'href': 'https://localhost/repo/info/lfs/objects/123'},
183 'verify': {
184 'header': {'Authorization': 'Basic XXXXX'},
185 'href': 'https://localhost/repo/info/lfs/verify'}
184 186 },
185 u'oid': u'123',
186 u'size': u'1024'}
187 'oid': '123',
188 'size': '1024'}
187 189 ]
188 190 assert json.loads(response.text) == {
189 191 'objects': expected_objects, 'transfer': 'basic'}
@@ -195,7 +197,7 b' class TestLFSApplication(object):'
195 197 status=400)
196 198
197 199 assert json.loads(response.text) == {
198 u'message': u'missing oid and size in request data'}
200 'message': 'missing oid and size in request data'}
199 201
200 202 def test_app_verify_api_missing_obj(self, git_lfs_app):
201 203 params = {'oid': 'missing', 'size': '1024'}
@@ -204,38 +206,38 b' class TestLFSApplication(object):'
204 206 status=404)
205 207
206 208 assert json.loads(response.text) == {
207 u'message': u'oid `missing` does not exists in store'}
209 'message': 'oid `missing` does not exists in store'}
208 210
209 211 def test_app_verify_api_size_mismatch(self, git_lfs_app):
210 212 oid = 'existing'
211 oid_path = os.path.join(git_lfs_app._store, oid)
213 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
212 214 if not os.path.isdir(os.path.dirname(oid_path)):
213 215 os.makedirs(os.path.dirname(oid_path))
214 216 with open(oid_path, 'wb') as f:
215 f.write('OID_CONTENT')
217 f.write(safe_bytes('OID_CONTENT'))
216 218
217 219 params = {'oid': oid, 'size': '1024'}
218 220 response = git_lfs_app.post_json(
219 221 '/repo/info/lfs/verify', params=params, status=422)
220 222
221 223 assert json.loads(response.text) == {
222 u'message': u'requested file size mismatch '
223 u'store size:11 requested:1024'}
224 'message': 'requested file size mismatch '
225 'store size:11 requested:1024'}
224 226
225 227 def test_app_verify_api(self, git_lfs_app):
226 228 oid = 'existing'
227 oid_path = os.path.join(git_lfs_app._store, oid)
229 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
228 230 if not os.path.isdir(os.path.dirname(oid_path)):
229 231 os.makedirs(os.path.dirname(oid_path))
230 232 with open(oid_path, 'wb') as f:
231 f.write('OID_CONTENT')
233 f.write(safe_bytes('OID_CONTENT'))
232 234
233 235 params = {'oid': oid, 'size': 11}
234 236 response = git_lfs_app.post_json(
235 237 '/repo/info/lfs/verify', params=params)
236 238
237 239 assert json.loads(response.text) == {
238 u'message': {u'size': u'ok', u'in_store': u'ok'}}
240 'message': {'size': 'ok', 'in_store': 'ok'}}
239 241
240 242 def test_app_download_api_oid_not_existing(self, git_lfs_app):
241 243 oid = 'missing'
@@ -244,15 +246,15 b' class TestLFSApplication(object):'
244 246 '/repo/info/lfs/objects/{oid}'.format(oid=oid), status=404)
245 247
246 248 assert json.loads(response.text) == {
247 u'message': u'requested file with oid `missing` not found in store'}
249 'message': 'requested file with oid `missing` not found in store'}
248 250
249 251 def test_app_download_api(self, git_lfs_app):
250 252 oid = 'existing'
251 oid_path = os.path.join(git_lfs_app._store, oid)
253 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
252 254 if not os.path.isdir(os.path.dirname(oid_path)):
253 255 os.makedirs(os.path.dirname(oid_path))
254 256 with open(oid_path, 'wb') as f:
255 f.write('OID_CONTENT')
257 f.write(safe_bytes('OID_CONTENT'))
256 258
257 259 response = git_lfs_app.get(
258 260 '/repo/info/lfs/objects/{oid}'.format(oid=oid))
@@ -264,9 +266,9 b' class TestLFSApplication(object):'
264 266 response = git_lfs_app.put(
265 267 '/repo/info/lfs/objects/{oid}'.format(oid=oid), params='CONTENT')
266 268
267 assert json.loads(response.text) == {u'upload': u'ok'}
269 assert json.loads(response.text) == {'upload': 'ok'}
268 270
269 271 # verify that we actually wrote that OID
270 oid_path = os.path.join(git_lfs_app._store, oid)
272 oid_path = LFSOidStore(oid=oid, repo=None, store_location=git_lfs_app._store).oid_path
271 273 assert os.path.isfile(oid_path)
272 274 assert 'CONTENT' == open(oid_path).read()
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -17,6 +17,7 b''
17 17
18 18 import os
19 19 import pytest
20 from vcsserver.str_utils import safe_bytes
20 21 from vcsserver.git_lfs.lib import OidHandler, LFSOidStore
21 22
22 23
@@ -42,7 +43,7 b' def oid_handler(lfs_store):'
42 43 return oid_handler
43 44
44 45
45 class TestOidHandler(object):
46 class TestOidHandler:
46 47
47 48 @pytest.mark.parametrize('exec_action', [
48 49 'download',
@@ -70,7 +71,7 b' class TestOidHandler(object):'
70 71 os.makedirs(os.path.dirname(store.oid_path))
71 72
72 73 with open(store.oid_path, 'wb') as f:
73 f.write('CONTENT')
74 f.write(safe_bytes('CONTENT'))
74 75
75 76 response, has_errors = oid_handler.exec_operation('download')
76 77
@@ -86,7 +87,7 b' class TestOidHandler(object):'
86 87 os.makedirs(os.path.dirname(store.oid_path))
87 88
88 89 with open(store.oid_path, 'wb') as f:
89 f.write('CONTENT')
90 f.write(safe_bytes('CONTENT'))
90 91 oid_handler.obj_size = 7
91 92 response, has_errors = oid_handler.exec_operation('upload')
92 93 assert has_errors is None
@@ -98,7 +99,7 b' class TestOidHandler(object):'
98 99 os.makedirs(os.path.dirname(store.oid_path))
99 100
100 101 with open(store.oid_path, 'wb') as f:
101 f.write('CONTENT')
102 f.write(safe_bytes('CONTENT'))
102 103
103 104 oid_handler.obj_size = 10240
104 105 response, has_errors = oid_handler.exec_operation('upload')
@@ -119,7 +120,7 b' class TestOidHandler(object):'
119 120 }
120 121
121 122
122 class TestLFSStore(object):
123 class TestLFSStore:
123 124 def test_write_oid(self, lfs_store):
124 125 oid_location = lfs_store.oid_path
125 126
@@ -127,7 +128,7 b' class TestLFSStore(object):'
127 128
128 129 engine = lfs_store.get_engine(mode='wb')
129 130 with engine as f:
130 f.write('CONTENT')
131 f.write(safe_bytes('CONTENT'))
131 132
132 133 assert os.path.isfile(oid_location)
133 134
@@ -136,6 +137,6 b' class TestLFSStore(object):'
136 137 assert lfs_store.has_oid() is False
137 138 engine = lfs_store.get_engine(mode='wb')
138 139 with engine as f:
139 f.write('CONTENT')
140 f.write(safe_bytes('CONTENT'))
140 141
141 assert lfs_store.has_oid() is True No newline at end of file
142 assert lfs_store.has_oid() is True
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -21,8 +21,11 b' Mercurial libs compatibility'
21 21
22 22 import mercurial
23 23 from mercurial import demandimport
24
24 25 # patch demandimport, due to bug in mercurial when it always triggers
25 26 # demandimport.enable()
27 from vcsserver.str_utils import safe_bytes
28
26 29 demandimport.enable = lambda *args, **kwargs: 1
27 30
28 31 from mercurial import ui
@@ -39,7 +42,8 b' from mercurial import subrepo'
39 42 from mercurial import subrepoutil
40 43 from mercurial import tags as hg_tag
41 44 from mercurial import util as hgutil
42 from mercurial.commands import clone, nullid, pull
45 from mercurial.commands import clone, pull
46 from mercurial.node import nullid
43 47 from mercurial.context import memctx, memfilectx
44 48 from mercurial.error import (
45 49 LookupError, RepoError, RepoLookupError, Abort, InterventionRequired,
@@ -53,7 +57,7 b' from mercurial.encoding import tolocal'
53 57 from mercurial.discovery import findcommonoutgoing
54 58 from mercurial.hg import peer
55 59 from mercurial.httppeer import makepeer
56 from mercurial.util import url as hg_url
60 from mercurial.utils.urlutil import url as hg_url
57 61 from mercurial.scmutil import revrange, revsymbol
58 62 from mercurial.node import nullrev
59 63 from mercurial import exchange
@@ -63,17 +67,26 b' from hgext import largefiles'
63 67 # infinit looping when given invalid resources
64 68 from mercurial.url import httpbasicauthhandler, httpdigestauthhandler
65 69
70 # hg strip is in core now
71 from mercurial import strip as hgext_strip
72
66 73
67 74 def get_ctx(repo, ref):
75 if not isinstance(ref, int):
76 ref = safe_bytes(ref)
77
68 78 try:
69 79 ctx = repo[ref]
80 return ctx
70 81 except (ProgrammingError, TypeError):
71 82 # we're unable to find the rev using a regular lookup, we fallback
72 83 # to slower, but backward compat revsymbol usage
73 ctx = revsymbol(repo, ref)
84 pass
74 85 except (LookupError, RepoLookupError):
75 86 # Similar case as above but only for refs that are not numeric
76 if isinstance(ref, (int, long)):
87 if isinstance(ref, int):
77 88 raise
78 ctx = revsymbol(repo, ref)
89
90 ctx = revsymbol(repo, ref)
91
79 92 return ctx
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -62,7 +62,7 b' def _dynamic_capabilities_wrapper(lfprot'
62 62
63 63 def patch_subrepo_type_mapping():
64 64 from collections import defaultdict
65 from hgcompat import subrepo, subrepoutil
65 from .hgcompat import subrepo, subrepoutil
66 66 from vcsserver.exceptions import SubrepoMergeException
67 67
68 68 class NoOpSubrepo(subrepo.abstractsubrepo):
@@ -1,7 +1,5 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
@@ -25,9 +23,23 b' import logging'
25 23 import pkg_resources
26 24
27 25 import vcsserver
26 from vcsserver.str_utils import safe_bytes
28 27
29 28 log = logging.getLogger(__name__)
30 29
30 HOOKS_DIR_MODE = 0o755
31 HOOKS_FILE_MODE = 0o755
32
33
34 def set_permissions_if_needed(path_to_check, perms: oct):
35 # Get current permissions
36 current_permissions = os.stat(path_to_check).st_mode & 0o777 # Extract permission bits
37
38 # Check if current permissions are lower than required
39 if current_permissions < int(perms):
40 # Change the permissions if they are lower than required
41 os.chmod(path_to_check, perms)
42
31 43
32 44 def get_git_hooks_path(repo_path, bare):
33 45 hooks_path = os.path.join(repo_path, 'hooks')
@@ -42,14 +54,19 b' def install_git_hooks(repo_path, bare, e'
42 54 Creates a RhodeCode hook inside a git repository
43 55
44 56 :param repo_path: path to repository
57 :param bare: defines if repository is considered a bare git repo
45 58 :param executable: binary executable to put in the hooks
46 :param force_create: Create even if same name hook exists
59 :param force_create: Creates even if the same name hook exists
47 60 """
48 61 executable = executable or sys.executable
49 62 hooks_path = get_git_hooks_path(repo_path, bare)
50 63
51 if not os.path.isdir(hooks_path):
52 os.makedirs(hooks_path, mode=0o777)
64 # we always call it to ensure dir exists and it has a proper mode
65 if not os.path.exists(hooks_path):
66 # If it doesn't exist, create a new directory with the specified mode
67 os.makedirs(hooks_path, mode=HOOKS_DIR_MODE, exist_ok=True)
68 # If it exists, change the directory's mode to the specified mode
69 set_permissions_if_needed(hooks_path, perms=HOOKS_DIR_MODE)
53 70
54 71 tmpl_post = pkg_resources.resource_string(
55 72 'vcsserver', '/'.join(
@@ -63,21 +80,20 b' def install_git_hooks(repo_path, bare, e'
63 80
64 81 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
65 82 log.debug('Installing git hook in repo %s', repo_path)
66 _hook_file = os.path.join(hooks_path, '%s-receive' % h_type)
83 _hook_file = os.path.join(hooks_path, f'{h_type}-receive')
67 84 _rhodecode_hook = check_rhodecode_hook(_hook_file)
68 85
69 86 if _rhodecode_hook or force_create:
70 87 log.debug('writing git %s hook file at %s !', h_type, _hook_file)
71 88 try:
72 89 with open(_hook_file, 'wb') as f:
73 template = template.replace(
74 '_TMPL_', vcsserver.__version__)
75 template = template.replace('_DATE_', timestamp)
76 template = template.replace('_ENV_', executable)
77 template = template.replace('_PATH_', path)
90 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
91 template = template.replace(b'_DATE_', safe_bytes(timestamp))
92 template = template.replace(b'_ENV_', safe_bytes(executable))
93 template = template.replace(b'_PATH_', safe_bytes(path))
78 94 f.write(template)
79 os.chmod(_hook_file, 0o755)
80 except IOError:
95 set_permissions_if_needed(_hook_file, perms=HOOKS_FILE_MODE)
96 except OSError:
81 97 log.exception('error writing hook file %s', _hook_file)
82 98 else:
83 99 log.debug('skipping writing hook file')
@@ -102,7 +118,7 b' def install_svn_hooks(repo_path, executa'
102 118 executable = executable or sys.executable
103 119 hooks_path = get_svn_hooks_path(repo_path)
104 120 if not os.path.isdir(hooks_path):
105 os.makedirs(hooks_path, mode=0o777)
121 os.makedirs(hooks_path, mode=0o777, exist_ok=True)
106 122
107 123 tmpl_post = pkg_resources.resource_string(
108 124 'vcsserver', '/'.join(
@@ -116,7 +132,7 b' def install_svn_hooks(repo_path, executa'
116 132
117 133 for h_type, template in [('pre', tmpl_pre), ('post', tmpl_post)]:
118 134 log.debug('Installing svn hook in repo %s', repo_path)
119 _hook_file = os.path.join(hooks_path, '%s-commit' % h_type)
135 _hook_file = os.path.join(hooks_path, f'{h_type}-commit')
120 136 _rhodecode_hook = check_rhodecode_hook(_hook_file)
121 137
122 138 if _rhodecode_hook or force_create:
@@ -124,15 +140,14 b' def install_svn_hooks(repo_path, executa'
124 140
125 141 try:
126 142 with open(_hook_file, 'wb') as f:
127 template = template.replace(
128 '_TMPL_', vcsserver.__version__)
129 template = template.replace('_DATE_', timestamp)
130 template = template.replace('_ENV_', executable)
131 template = template.replace('_PATH_', path)
143 template = template.replace(b'_TMPL_', safe_bytes(vcsserver.get_version()))
144 template = template.replace(b'_DATE_', safe_bytes(timestamp))
145 template = template.replace(b'_ENV_', safe_bytes(executable))
146 template = template.replace(b'_PATH_', safe_bytes(path))
132 147
133 148 f.write(template)
134 149 os.chmod(_hook_file, 0o755)
135 except IOError:
150 except OSError:
136 151 log.exception('error writing hook file %s', _hook_file)
137 152 else:
138 153 log.debug('skipping writing hook file')
@@ -141,16 +156,16 b' def install_svn_hooks(repo_path, executa'
141 156
142 157
143 158 def get_version_from_hook(hook_path):
144 version = ''
159 version = b''
145 160 hook_content = read_hook_content(hook_path)
146 matches = re.search(r'(?:RC_HOOK_VER)\s*=\s*(.*)', hook_content)
161 matches = re.search(rb'RC_HOOK_VER\s*=\s*(.*)', hook_content)
147 162 if matches:
148 163 try:
149 164 version = matches.groups()[0]
150 165 log.debug('got version %s from hooks.', version)
151 166 except Exception:
152 167 log.exception("Exception while reading the hook version.")
153 return version.replace("'", "")
168 return version.replace(b"'", b"")
154 169
155 170
156 171 def check_rhodecode_hook(hook_path):
@@ -169,8 +184,8 b' def check_rhodecode_hook(hook_path):'
169 184 return False
170 185
171 186
172 def read_hook_content(hook_path):
173 content = ''
187 def read_hook_content(hook_path) -> bytes:
188 content = b''
174 189 if os.path.isfile(hook_path):
175 190 with open(hook_path, 'rb') as f:
176 191 content = f.read()
@@ -11,7 +11,7 b' try:'
11 11 except ImportError:
12 12 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 13 import traceback
14 print traceback.format_exc()
14 print(traceback.format_exc())
15 15 hooks = None
16 16
17 17
@@ -42,7 +42,7 b' def main():'
42 42 # TODO: johbo: Improve handling of this special case
43 43 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 44 raise
45 print 'ERROR:', error
45 print(f'ERROR: {error}')
46 46 sys.exit(1)
47 47 sys.exit(0)
48 48
@@ -11,7 +11,7 b' try:'
11 11 except ImportError:
12 12 if os.environ.get('RC_DEBUG_GIT_HOOK'):
13 13 import traceback
14 print traceback.format_exc()
14 print(traceback.format_exc())
15 15 hooks = None
16 16
17 17
@@ -42,7 +42,7 b' def main():'
42 42 # TODO: johbo: Improve handling of this special case
43 43 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
44 44 raise
45 print 'ERROR:', error
45 print(f'ERROR: {error}')
46 46 sys.exit(1)
47 47 sys.exit(0)
48 48
@@ -12,7 +12,7 b' try:'
12 12 except ImportError:
13 13 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 14 import traceback
15 print traceback.format_exc()
15 print(traceback.format_exc())
16 16 hooks = None
17 17
18 18
@@ -40,7 +40,7 b' def main():'
40 40 # TODO: johbo: Improve handling of this special case
41 41 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
42 42 raise
43 print 'ERROR:', error
43 print(f'ERROR: {error}')
44 44 sys.exit(1)
45 45 sys.exit(0)
46 46
@@ -12,7 +12,7 b' try:'
12 12 except ImportError:
13 13 if os.environ.get('RC_DEBUG_SVN_HOOK'):
14 14 import traceback
15 print traceback.format_exc()
15 print(traceback.format_exc())
16 16 hooks = None
17 17
18 18
@@ -43,7 +43,7 b' def main():'
43 43 # TODO: johbo: Improve handling of this special case
44 44 if not getattr(error, '_vcs_kind', None) == 'repo_locked':
45 45 raise
46 print 'ERROR:', error
46 print(f'ERROR: {error}')
47 47 sys.exit(1)
48 48 sys.exit(0)
49 49
@@ -1,7 +1,5 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
@@ -22,71 +20,102 b' import os'
22 20 import sys
23 21 import logging
24 22 import collections
25 import importlib
26 23 import base64
24 import msgpack
25 import dataclasses
26 import pygit2
27 27
28 from httplib import HTTPConnection
29
28 import http.client
29 from celery import Celery
30 30
31 31 import mercurial.scmutil
32 32 import mercurial.node
33 import simplejson as json
34 33
34 from vcsserver.lib.rc_json import json
35 35 from vcsserver import exceptions, subprocessio, settings
36 from vcsserver.str_utils import ascii_str, safe_str
37 from vcsserver.remote.git_remote import Repository
36 38
39 celery_app = Celery('__vcsserver__')
37 40 log = logging.getLogger(__name__)
38 41
39 42
40 class HooksHttpClient(object):
43 class HooksHttpClient:
44 proto = 'msgpack.v1'
41 45 connection = None
42 46
43 47 def __init__(self, hooks_uri):
44 48 self.hooks_uri = hooks_uri
45 49
50 def __repr__(self):
51 return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
52
46 53 def __call__(self, method, extras):
47 connection = HTTPConnection(self.hooks_uri)
48 body = self._serialize(method, extras)
49 try:
50 connection.request('POST', '/', body)
51 except Exception:
52 log.error('Hooks calling Connection failed on %s', connection.__dict__)
53 raise
54 response = connection.getresponse()
55
56 response_data = response.read()
54 connection = http.client.HTTPConnection(self.hooks_uri)
55 # binary msgpack body
56 headers, body = self._serialize(method, extras)
57 log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
57 58
58 59 try:
59 return json.loads(response_data)
60 except Exception:
61 log.exception('Failed to decode hook response json data. '
62 'response_code:%s, raw_data:%s',
63 response.status, response_data)
64 raise
60 try:
61 connection.request('POST', '/', body, headers)
62 except Exception as error:
63 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
64 raise
65 65
66 def _serialize(self, hook_name, extras):
66 response = connection.getresponse()
67 try:
68 return msgpack.load(response)
69 except Exception:
70 response_data = response.read()
71 log.exception('Failed to decode hook response json data. '
72 'response_code:%s, raw_data:%s',
73 response.status, response_data)
74 raise
75 finally:
76 connection.close()
77
78 @classmethod
79 def _serialize(cls, hook_name, extras):
67 80 data = {
68 81 'method': hook_name,
69 82 'extras': extras
70 83 }
71 return json.dumps(data)
84 headers = {
85 "rc-hooks-protocol": cls.proto,
86 "Connection": "keep-alive"
87 }
88 return headers, msgpack.packb(data)
72 89
73 90
74 class HooksDummyClient(object):
75 def __init__(self, hooks_module):
76 self._hooks_module = importlib.import_module(hooks_module)
91 class HooksCeleryClient:
92 TASK_TIMEOUT = 60 # time in seconds
77 93
78 def __call__(self, hook_name, extras):
79 with self._hooks_module.Hooks() as hooks:
80 return getattr(hooks, hook_name)(extras)
94 def __init__(self, queue, backend):
95 celery_app.config_from_object({
96 'broker_url': queue, 'result_backend': backend,
97 'broker_connection_retry_on_startup': True,
98 'task_serializer': 'msgpack',
99 'accept_content': ['json', 'msgpack'],
100 'result_serializer': 'msgpack',
101 'result_accept_content': ['json', 'msgpack']
102 })
103 self.celery_app = celery_app
104
105 def __call__(self, method, extras):
106 inquired_task = self.celery_app.signature(
107 f'rhodecode.lib.celerylib.tasks.{method}'
108 )
109 return inquired_task.delay(extras).get(timeout=self.TASK_TIMEOUT)
81 110
82 111
83 class HooksShadowRepoClient(object):
112 class HooksShadowRepoClient:
84 113
85 114 def __call__(self, hook_name, extras):
86 115 return {'output': '', 'status': 0}
87 116
88 117
89 class RemoteMessageWriter(object):
118 class RemoteMessageWriter:
90 119 """Writer base class."""
91 120 def write(self, message):
92 121 raise NotImplementedError()
@@ -98,7 +127,7 b' class HgMessageWriter(RemoteMessageWrite'
98 127 def __init__(self, ui):
99 128 self.ui = ui
100 129
101 def write(self, message):
130 def write(self, message: str):
102 131 # TODO: Check why the quiet flag is set by default.
103 132 old = self.ui.quiet
104 133 self.ui.quiet = False
@@ -112,8 +141,8 b' class GitMessageWriter(RemoteMessageWrit'
112 141 def __init__(self, stdout=None):
113 142 self.stdout = stdout or sys.stdout
114 143
115 def write(self, message):
116 self.stdout.write(message.encode('utf-8'))
144 def write(self, message: str):
145 self.stdout.write(message)
117 146
118 147
119 148 class SvnMessageWriter(RemoteMessageWriter):
@@ -130,6 +159,7 b' class SvnMessageWriter(RemoteMessageWrit'
130 159 def _handle_exception(result):
131 160 exception_class = result.get('exception')
132 161 exception_traceback = result.get('exception_traceback')
162 log.debug('Handling hook-call exception: %s', exception_class)
133 163
134 164 if exception_traceback:
135 165 log.error('Got traceback from remote call:%s', exception_traceback)
@@ -141,19 +171,25 b' def _handle_exception(result):'
141 171 elif exception_class == 'RepositoryError':
142 172 raise exceptions.VcsException()(*result['exception_args'])
143 173 elif exception_class:
144 raise Exception('Got remote exception "%s" with args "%s"' %
145 (exception_class, result['exception_args']))
174 raise Exception(
175 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
176 )
146 177
147 178
148 179 def _get_hooks_client(extras):
149 180 hooks_uri = extras.get('hooks_uri')
181 task_queue = extras.get('task_queue')
182 task_backend = extras.get('task_backend')
150 183 is_shadow_repo = extras.get('is_shadow_repo')
184
151 185 if hooks_uri:
152 return HooksHttpClient(extras['hooks_uri'])
186 return HooksHttpClient(hooks_uri)
187 elif task_queue and task_backend:
188 return HooksCeleryClient(task_queue, task_backend)
153 189 elif is_shadow_repo:
154 190 return HooksShadowRepoClient()
155 191 else:
156 return HooksDummyClient(extras['hooks_module'])
192 raise Exception("Hooks client not found!")
157 193
158 194
159 195 def _call_hook(hook_name, extras, writer):
@@ -161,7 +197,6 b' def _call_hook(hook_name, extras, writer'
161 197 log.debug('Hooks, using client:%s', hooks_client)
162 198 result = hooks_client(hook_name, extras)
163 199 log.debug('Hooks got result: %s', result)
164
165 200 _handle_exception(result)
166 201 writer.write(result['output'])
167 202
@@ -169,7 +204,7 b' def _call_hook(hook_name, extras, writer'
169 204
170 205
171 206 def _extras_from_ui(ui):
172 hook_data = ui.config('rhodecode', 'RC_SCM_DATA')
207 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
173 208 if not hook_data:
174 209 # maybe it's inside environ ?
175 210 env_hook_data = os.environ.get('RC_SCM_DATA')
@@ -192,8 +227,8 b' def _rev_range_hash(repo, node, check_he'
192 227 for rev in range(start, end):
193 228 revs.append(rev)
194 229 ctx = get_ctx(repo, rev)
195 commit_id = mercurial.node.hex(ctx.node())
196 branch = ctx.branch()
230 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
231 branch = safe_str(ctx.branch())
197 232 commits.append((commit_id, branch))
198 233
199 234 parent_heads = []
@@ -217,9 +252,9 b' def _check_heads(repo, start, end, commi'
217 252 for p in parents:
218 253 branch = get_ctx(repo, p).branch()
219 254 # The heads descending from that parent, on the same branch
220 parent_heads = set([p])
221 reachable = set([p])
222 for x in xrange(p + 1, end):
255 parent_heads = {p}
256 reachable = {p}
257 for x in range(p + 1, end):
223 258 if get_ctx(repo, x).branch() != branch:
224 259 continue
225 260 for pp in changelog.parentrevs(x):
@@ -295,14 +330,16 b' def pre_push(ui, repo, node=None, **kwar'
295 330 detect_force_push = extras.get('detect_force_push')
296 331
297 332 rev_data = []
298 if node and kwargs.get('hooktype') == 'pretxnchangegroup':
333 hook_type: str = safe_str(kwargs.get('hooktype'))
334
335 if node and hook_type == 'pretxnchangegroup':
299 336 branches = collections.defaultdict(list)
300 337 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
301 338 for commit_id, branch in commits:
302 339 branches[branch].append(commit_id)
303 340
304 341 for branch, commits in branches.items():
305 old_rev = kwargs.get('node_last') or commits[0]
342 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
306 343 rev_data.append({
307 344 'total_commits': len(commits),
308 345 'old_rev': old_rev,
@@ -319,10 +356,10 b' def pre_push(ui, repo, node=None, **kwar'
319 356 extras.get('repo_store', ''), extras.get('repository', ''))
320 357 push_ref['hg_env'] = _get_hg_env(
321 358 old_rev=push_ref['old_rev'],
322 new_rev=push_ref['new_rev'], txnid=kwargs.get('txnid'),
359 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
323 360 repo_path=repo_path)
324 361
325 extras['hook_type'] = kwargs.get('hooktype', 'pre_push')
362 extras['hook_type'] = hook_type or 'pre_push'
326 363 extras['commit_ids'] = rev_data
327 364
328 365 return _call_hook('pre_push', extras, HgMessageWriter(ui))
@@ -363,6 +400,7 b' def post_push(ui, repo, node, **kwargs):'
363 400 branches = []
364 401 bookmarks = []
365 402 tags = []
403 hook_type: str = safe_str(kwargs.get('hooktype'))
366 404
367 405 commits, _heads = _rev_range_hash(repo, node)
368 406 for commit_id, branch in commits:
@@ -370,11 +408,12 b' def post_push(ui, repo, node, **kwargs):'
370 408 if branch not in branches:
371 409 branches.append(branch)
372 410
373 if hasattr(ui, '_rc_pushkey_branches'):
374 bookmarks = ui._rc_pushkey_branches
411 if hasattr(ui, '_rc_pushkey_bookmarks'):
412 bookmarks = ui._rc_pushkey_bookmarks
375 413
376 extras['hook_type'] = kwargs.get('hooktype', 'post_push')
414 extras['hook_type'] = hook_type or 'post_push'
377 415 extras['commit_ids'] = commit_ids
416
378 417 extras['new_refs'] = {
379 418 'branches': branches,
380 419 'bookmarks': bookmarks,
@@ -395,9 +434,10 b' def post_push_ssh(ui, repo, node, **kwar'
395 434
396 435 def key_push(ui, repo, **kwargs):
397 436 from vcsserver.hgcompat import get_ctx
398 if kwargs['new'] != '0' and kwargs['namespace'] == 'bookmarks':
437
438 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
399 439 # store new bookmarks in our UI object propagated later to post_push
400 ui._rc_pushkey_branches = get_ctx(repo, kwargs['key']).bookmarks()
440 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
401 441 return
402 442
403 443
@@ -426,10 +466,13 b' def handle_git_post_receive(unused_repo_'
426 466 pass
427 467
428 468
429 HookResponse = collections.namedtuple('HookResponse', ('status', 'output'))
469 @dataclasses.dataclass
470 class HookResponse:
471 status: int
472 output: str
430 473
431 474
432 def git_pre_pull(extras):
475 def git_pre_pull(extras) -> HookResponse:
433 476 """
434 477 Pre pull hook.
435 478
@@ -439,20 +482,23 b' def git_pre_pull(extras):'
439 482 :return: status code of the hook. 0 for success.
440 483 :rtype: int
441 484 """
485
442 486 if 'pull' not in extras['hooks']:
443 487 return HookResponse(0, '')
444 488
445 stdout = io.BytesIO()
489 stdout = io.StringIO()
446 490 try:
447 status = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
491 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
492
448 493 except Exception as error:
449 status = 128
450 stdout.write('ERROR: %s\n' % str(error))
494 log.exception('Failed to call pre_pull hook')
495 status_code = 128
496 stdout.write(f'ERROR: {error}\n')
451 497
452 return HookResponse(status, stdout.getvalue())
498 return HookResponse(status_code, stdout.getvalue())
453 499
454 500
455 def git_post_pull(extras):
501 def git_post_pull(extras) -> HookResponse:
456 502 """
457 503 Post pull hook.
458 504
@@ -465,12 +511,12 b' def git_post_pull(extras):'
465 511 if 'pull' not in extras['hooks']:
466 512 return HookResponse(0, '')
467 513
468 stdout = io.BytesIO()
514 stdout = io.StringIO()
469 515 try:
470 516 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
471 517 except Exception as error:
472 518 status = 128
473 stdout.write('ERROR: %s\n' % error)
519 stdout.write(f'ERROR: {error}\n')
474 520
475 521 return HookResponse(status, stdout.getvalue())
476 522
@@ -495,15 +541,11 b' def _parse_git_ref_lines(revision_lines)'
495 541 return rev_data
496 542
497 543
498 def git_pre_receive(unused_repo_path, revision_lines, env):
544 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
499 545 """
500 546 Pre push hook.
501 547
502 :param extras: dictionary containing the keys defined in simplevcs
503 :type extras: dict
504
505 548 :return: status code of the hook. 0 for success.
506 :rtype: int
507 549 """
508 550 extras = json.loads(env['RC_SCM_DATA'])
509 551 rev_data = _parse_git_ref_lines(revision_lines)
@@ -527,7 +569,7 b' def git_pre_receive(unused_repo_path, re'
527 569 if type_ == 'heads' and not (new_branch or delete_branch):
528 570 old_rev = push_ref['old_rev']
529 571 new_rev = push_ref['new_rev']
530 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
572 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
531 573 stdout, stderr = subprocessio.run_command(
532 574 cmd, env=os.environ.copy())
533 575 # means we're having some non-reachable objects, this forced push was used
@@ -536,18 +578,18 b' def git_pre_receive(unused_repo_path, re'
536 578
537 579 extras['hook_type'] = 'pre_receive'
538 580 extras['commit_ids'] = rev_data
539 return _call_hook('pre_push', extras, GitMessageWriter())
581
582 stdout = sys.stdout
583 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
584
585 return status_code
540 586
541 587
542 def git_post_receive(unused_repo_path, revision_lines, env):
588 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
543 589 """
544 590 Post push hook.
545 591
546 :param extras: dictionary containing the keys defined in simplevcs
547 :type extras: dict
548
549 592 :return: status code of the hook. 0 for success.
550 :rtype: int
551 593 """
552 594 extras = json.loads(env['RC_SCM_DATA'])
553 595 if 'push' not in extras['hooks']:
@@ -567,26 +609,28 b' def git_post_receive(unused_repo_path, r'
567 609 type_ = push_ref['type']
568 610
569 611 if type_ == 'heads':
612 # starting new branch case
570 613 if push_ref['old_rev'] == empty_commit_id:
571 # starting new branch case
572 if push_ref['name'] not in branches:
573 branches.append(push_ref['name'])
614 push_ref_name = push_ref['name']
615
616 if push_ref_name not in branches:
617 branches.append(push_ref_name)
574 618
575 # Fix up head revision if needed
576 cmd = [settings.GIT_EXECUTABLE, 'show', 'HEAD']
577 try:
578 subprocessio.run_command(cmd, env=os.environ.copy())
579 except Exception:
580 cmd = [settings.GIT_EXECUTABLE, 'symbolic-ref', 'HEAD',
581 'refs/heads/%s' % push_ref['name']]
582 print("Setting default branch to %s" % push_ref['name'])
583 subprocessio.run_command(cmd, env=os.environ.copy())
619 need_head_set = ''
620 with Repository(os.getcwd()) as repo:
621 try:
622 repo.head
623 except pygit2.GitError:
624 need_head_set = f'refs/heads/{push_ref_name}'
584 625
585 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref',
586 '--format=%(refname)', 'refs/heads/*']
626 if need_head_set:
627 repo.set_head(need_head_set)
628 print(f"Setting default branch to {push_ref_name}")
629
630 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
587 631 stdout, stderr = subprocessio.run_command(
588 632 cmd, env=os.environ.copy())
589 heads = stdout
633 heads = safe_str(stdout)
590 634 heads = heads.replace(push_ref['ref'], '')
591 635 heads = ' '.join(head for head
592 636 in heads.splitlines() if head) or '.'
@@ -595,24 +639,43 b' def git_post_receive(unused_repo_path, r'
595 639 '--not', heads]
596 640 stdout, stderr = subprocessio.run_command(
597 641 cmd, env=os.environ.copy())
598 git_revs.extend(stdout.splitlines())
642 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
643
644 # delete branch case
599 645 elif push_ref['new_rev'] == empty_commit_id:
600 # delete branch case
601 git_revs.append('delete_branch=>%s' % push_ref['name'])
646 git_revs.append(f'delete_branch=>{push_ref["name"]}')
602 647 else:
603 648 if push_ref['name'] not in branches:
604 649 branches.append(push_ref['name'])
605 650
606 651 cmd = [settings.GIT_EXECUTABLE, 'log',
607 '{old_rev}..{new_rev}'.format(**push_ref),
652 f'{push_ref["old_rev"]}..{push_ref["new_rev"]}',
608 653 '--reverse', '--pretty=format:%H']
609 654 stdout, stderr = subprocessio.run_command(
610 655 cmd, env=os.environ.copy())
611 git_revs.extend(stdout.splitlines())
656 # we get bytes from stdout, we need str to be consistent
657 log_revs = list(map(ascii_str, stdout.splitlines()))
658 git_revs.extend(log_revs)
659
660 # Pure pygit2 impl. but still 2-3x slower :/
661 # results = []
662 #
663 # with Repository(os.getcwd()) as repo:
664 # repo_new_rev = repo[push_ref['new_rev']]
665 # repo_old_rev = repo[push_ref['old_rev']]
666 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
667 #
668 # for commit in walker:
669 # if commit.id == repo_old_rev.id:
670 # break
671 # results.append(commit.id.hex)
672 # # reverse the order, can't use GIT_SORT_REVERSE
673 # log_revs = results[::-1]
674
612 675 elif type_ == 'tags':
613 676 if push_ref['name'] not in tags:
614 677 tags.append(push_ref['name'])
615 git_revs.append('tag=>%s' % push_ref['name'])
678 git_revs.append(f'tag=>{push_ref["name"]}')
616 679
617 680 extras['hook_type'] = 'post_receive'
618 681 extras['commit_ids'] = git_revs
@@ -622,13 +685,16 b' def git_post_receive(unused_repo_path, r'
622 685 'tags': tags,
623 686 }
624 687
688 stdout = sys.stdout
689
625 690 if 'repo_size' in extras['hooks']:
626 691 try:
627 _call_hook('repo_size', extras, GitMessageWriter())
628 except:
692 _call_hook('repo_size', extras, GitMessageWriter(stdout))
693 except Exception:
629 694 pass
630 695
631 return _call_hook('post_push', extras, GitMessageWriter())
696 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
697 return status_code
632 698
633 699
634 700 def _get_extras_from_txn_id(path, txn_id):
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -15,28 +15,76 b''
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import io
18 19 import os
20 import platform
19 21 import sys
20 import base64
21 22 import locale
22 23 import logging
23 24 import uuid
25 import time
24 26 import wsgiref.util
25 import traceback
26 27 import tempfile
27 28 import psutil
29
28 30 from itertools import chain
29 from cStringIO import StringIO
30 31
31 import simplejson as json
32 32 import msgpack
33 import configparser
34
33 35 from pyramid.config import Configurator
34 from pyramid.settings import asbool, aslist
35 36 from pyramid.wsgi import wsgiapp
36 from pyramid.compat import configparser
37 37 from pyramid.response import Response
38 38
39 from vcsserver.utils import safe_int
39 from vcsserver.base import BytesEnvelope, BinaryEnvelope
40 from vcsserver.lib.rc_json import json
41 from vcsserver.config.settings_maker import SettingsMaker
42 from vcsserver.str_utils import safe_int
43 from vcsserver.lib.statsd_client import StatsdClient
44 from vcsserver.tweens.request_wrapper import get_headers_call_context
45
46 import vcsserver
47 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
48 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
49 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
50 from vcsserver.echo_stub.echo_app import EchoApp
51 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
52 from vcsserver.lib.exc_tracking import store_exception, format_exc
53 from vcsserver.server import VcsServer
54
55 strict_vcs = True
56
57 git_import_err = None
58 try:
59 from vcsserver.remote.git_remote import GitFactory, GitRemote
60 except ImportError as e:
61 GitFactory = None
62 GitRemote = None
63 git_import_err = e
64 if strict_vcs:
65 raise
66
67
68 hg_import_err = None
69 try:
70 from vcsserver.remote.hg_remote import MercurialFactory, HgRemote
71 except ImportError as e:
72 MercurialFactory = None
73 HgRemote = None
74 hg_import_err = e
75 if strict_vcs:
76 raise
77
78
79 svn_import_err = None
80 try:
81 from vcsserver.remote.svn_remote import SubversionFactory, SvnRemote
82 except ImportError as e:
83 SubversionFactory = None
84 SvnRemote = None
85 svn_import_err = e
86 if strict_vcs:
87 raise
40 88
41 89 log = logging.getLogger(__name__)
42 90
@@ -50,73 +98,12 b' except locale.Error as e:'
50 98 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
51 99 os.environ['LC_ALL'] = 'C'
52 100
53 import vcsserver
54 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
55 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
56 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
57 from vcsserver.echo_stub.echo_app import EchoApp
58 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
59 from vcsserver.lib.exc_tracking import store_exception
60 from vcsserver.server import VcsServer
61
62 try:
63 from vcsserver.git import GitFactory, GitRemote
64 except ImportError:
65 GitFactory = None
66 GitRemote = None
67
68 try:
69 from vcsserver.hg import MercurialFactory, HgRemote
70 except ImportError:
71 MercurialFactory = None
72 HgRemote = None
73
74 try:
75 from vcsserver.svn import SubversionFactory, SvnRemote
76 except ImportError:
77 SubversionFactory = None
78 SvnRemote = None
79
80 101
81 102 def _is_request_chunked(environ):
82 103 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
83 104 return stream
84 105
85 106
86 def _int_setting(settings, name, default):
87 settings[name] = int(settings.get(name, default))
88 return settings[name]
89
90
91 def _bool_setting(settings, name, default):
92 input_val = settings.get(name, default)
93 if isinstance(input_val, unicode):
94 input_val = input_val.encode('utf8')
95 settings[name] = asbool(input_val)
96 return settings[name]
97
98
99 def _list_setting(settings, name, default):
100 raw_value = settings.get(name, default)
101
102 # Otherwise we assume it uses pyramids space/newline separation.
103 settings[name] = aslist(raw_value)
104 return settings[name]
105
106
107 def _string_setting(settings, name, default, lower=True, default_when_empty=False):
108 value = settings.get(name, default)
109
110 if default_when_empty and not value:
111 # use default value when value is empty
112 value = default
113
114 if lower:
115 value = value.lower()
116 settings[name] = value
117 return settings[name]
118
119
120 107 def log_max_fd():
121 108 try:
122 109 maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
@@ -125,7 +112,7 b' def log_max_fd():'
125 112 pass
126 113
127 114
128 class VCS(object):
115 class VCS:
129 116 def __init__(self, locale_conf=None, cache_config=None):
130 117 self.locale = locale_conf
131 118 self.cache_config = cache_config
@@ -137,13 +124,13 b' class VCS(object):'
137 124 git_factory = GitFactory()
138 125 self._git_remote = GitRemote(git_factory)
139 126 else:
140 log.info("Git client import failed")
127 log.error("Git client import failed: %s", git_import_err)
141 128
142 129 if MercurialFactory and HgRemote:
143 130 hg_factory = MercurialFactory()
144 131 self._hg_remote = HgRemote(hg_factory)
145 132 else:
146 log.info("Mercurial client import failed")
133 log.error("Mercurial client import failed: %s", hg_import_err)
147 134
148 135 if SubversionFactory and SvnRemote:
149 136 svn_factory = SubversionFactory()
@@ -152,7 +139,7 b' class VCS(object):'
152 139 hg_factory = MercurialFactory()
153 140 self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
154 141 else:
155 log.info("Subversion client import failed")
142 log.error("Subversion client import failed: %s", svn_import_err)
156 143
157 144 self._vcsserver = VcsServer()
158 145
@@ -160,8 +147,7 b' class VCS(object):'
160 147 if self.locale:
161 148 log.info('Settings locale: `LC_ALL` to %s', self.locale)
162 149 else:
163 log.info(
164 'Configuring locale subsystem based on environment variables')
150 log.info('Configuring locale subsystem based on environment variables')
165 151 try:
166 152 # If self.locale is the empty string, then the locale
167 153 # module will use the environment variables. See the
@@ -173,11 +159,10 b' class VCS(object):'
173 159 'Locale set to language code "%s" with encoding "%s".',
174 160 language_code, encoding)
175 161 except locale.Error:
176 log.exception(
177 'Cannot set locale, not configuring the locale system')
162 log.exception('Cannot set locale, not configuring the locale system')
178 163
179 164
180 class WsgiProxy(object):
165 class WsgiProxy:
181 166 def __init__(self, wsgi):
182 167 self.wsgi = wsgi
183 168
@@ -215,12 +200,12 b' def not_found(request):'
215 200 return {'status': '404 NOT FOUND'}
216 201
217 202
218 class VCSViewPredicate(object):
203 class VCSViewPredicate:
219 204 def __init__(self, val, config):
220 205 self.remotes = val
221 206
222 207 def text(self):
223 return 'vcs view method = %s' % (self.remotes.keys(),)
208 return f'vcs view method = {list(self.remotes.keys())}'
224 209
225 210 phash = text
226 211
@@ -233,18 +218,22 b' class VCSViewPredicate(object):'
233 218 return backend in self.remotes
234 219
235 220
236 class HTTPApplication(object):
221 class HTTPApplication:
237 222 ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
238 223
239 224 remote_wsgi = remote_wsgi
240 225 _use_echo_app = False
241 226
242 227 def __init__(self, settings=None, global_config=None):
243 self._sanitize_settings_and_apply_defaults(settings)
244 228
245 229 self.config = Configurator(settings=settings)
230 # Init our statsd at very start
231 self.config.registry.statsd = StatsdClient.statsd
232 self.config.registry.vcs_call_context = {}
233
246 234 self.global_config = global_config
247 235 self.config.include('vcsserver.lib.rc_cache')
236 self.config.include('vcsserver.lib.rc_cache.archive_cache')
248 237
249 238 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
250 239 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
@@ -281,40 +270,6 b' class HTTPApplication(object):'
281 270 vcsserver.PYRAMID_SETTINGS = settings_merged
282 271 vcsserver.CONFIG = settings_merged
283 272
284 def _sanitize_settings_and_apply_defaults(self, settings):
285 temp_store = tempfile.gettempdir()
286 default_cache_dir = os.path.join(temp_store, 'rc_cache')
287
288 # save default, cache dir, and use it for all backends later.
289 default_cache_dir = _string_setting(
290 settings,
291 'cache_dir',
292 default_cache_dir, lower=False, default_when_empty=True)
293
294 # ensure we have our dir created
295 if not os.path.isdir(default_cache_dir):
296 os.makedirs(default_cache_dir, mode=0o755)
297
298 # exception store cache
299 _string_setting(
300 settings,
301 'exception_tracker.store_path',
302 temp_store, lower=False, default_when_empty=True)
303
304 # repo_object cache
305 _string_setting(
306 settings,
307 'rc_cache.repo_object.backend',
308 'dogpile.cache.rc.file_namespace', lower=False)
309 _int_setting(
310 settings,
311 'rc_cache.repo_object.expiration_time',
312 30 * 24 * 60 * 60)
313 _string_setting(
314 settings,
315 'rc_cache.repo_object.arguments.filename',
316 os.path.join(default_cache_dir, 'vcsserver_cache_1'), lower=False)
317
318 273 def _configure(self):
319 274 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
320 275
@@ -359,16 +314,13 b' class HTTPApplication(object):'
359 314 'vcsserver.lib.request_counter.get_request_counter',
360 315 'request_count')
361 316
362 self.config.add_request_method(
363 'vcsserver.lib._vendor.statsd.get_statsd_client',
364 'statsd', reify=True)
365
366 317 def wsgi_app(self):
367 318 return self.config.make_wsgi_app()
368 319
369 320 def _vcs_view_params(self, request):
370 321 remote = self._remotes[request.matchdict['backend']]
371 322 payload = msgpack.unpackb(request.body, use_list=True)
323
372 324 method = payload.get('method')
373 325 params = payload['params']
374 326 wire = params.get('wire')
@@ -376,6 +328,11 b' class HTTPApplication(object):'
376 328 kwargs = params.get('kwargs')
377 329 context_uid = None
378 330
331 request.registry.vcs_call_context = {
332 'method': method,
333 'repo_name': payload.get('_repo_name'),
334 }
335
379 336 if wire:
380 337 try:
381 338 wire['context'] = context_uid = uuid.UUID(wire['context'])
@@ -386,17 +343,34 b' class HTTPApplication(object):'
386 343
387 344 # NOTE(marcink): trading complexity for slight performance
388 345 if log.isEnabledFor(logging.DEBUG):
389 no_args_methods = [
390
391 ]
392 if method in no_args_methods:
346 # also we SKIP printing out any of those methods args since they maybe excessive
347 just_args_methods = {
348 'commitctx': ('content', 'removed', 'updated'),
349 'commit': ('content', 'removed', 'updated')
350 }
351 if method in just_args_methods:
352 skip_args = just_args_methods[method]
393 353 call_args = ''
354 call_kwargs = {}
355 for k in kwargs:
356 if k in skip_args:
357 # replace our skip key with dummy
358 call_kwargs[k] = f'RemovedParam({k})'
359 else:
360 call_kwargs[k] = kwargs[k]
394 361 else:
395 362 call_args = args[1:]
363 call_kwargs = kwargs
396 364
397 365 log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
398 method, call_args, kwargs, context_uid, repo_state_uid)
366 method, call_args, call_kwargs, context_uid, repo_state_uid)
399 367
368 statsd = request.registry.statsd
369 if statsd:
370 statsd.incr(
371 'vcsserver_method_total', tags=[
372 f"method:{method}",
373 ])
400 374 return payload, remote, method, args, kwargs
401 375
402 376 def vcs_view(self, request):
@@ -435,8 +409,7 b' class HTTPApplication(object):'
435 409 if should_store_exc:
436 410 store_exception(id(exc_info), exc_info, request_path=request.path)
437 411
438 tb_info = ''.join(
439 traceback.format_exception(exc_type, exc_value, exc_traceback))
412 tb_info = format_exc(exc_info)
440 413
441 414 type_ = e.__class__.__name__
442 415 if type_ not in self.ALLOWED_EXCEPTIONS:
@@ -445,7 +418,7 b' class HTTPApplication(object):'
445 418 resp = {
446 419 'id': payload_id,
447 420 'error': {
448 'message': e.message,
421 'message': str(e),
449 422 'traceback': tb_info,
450 423 'org_exc': org_exc_name,
451 424 'org_exc_tb': org_exc_tb,
@@ -462,7 +435,7 b' class HTTPApplication(object):'
462 435 'id': payload_id,
463 436 'result': resp
464 437 }
465
438 log.debug('Serving data for method %s', method)
466 439 return resp
467 440
468 441 def vcs_stream_view(self, request):
@@ -471,13 +444,10 b' class HTTPApplication(object):'
471 444 method = method.split('stream:')[-1]
472 445 chunk_size = safe_int(payload.get('chunk_size')) or 4096
473 446
474 try:
475 resp = getattr(remote, method)(*args, **kwargs)
476 except Exception as e:
477 raise
447 resp = getattr(remote, method)(*args, **kwargs)
478 448
479 449 def get_chunked_data(method_resp):
480 stream = StringIO(method_resp)
450 stream = io.BytesIO(method_resp)
481 451 while 1:
482 452 chunk = stream.read(chunk_size)
483 453 if not chunk:
@@ -491,8 +461,14 b' class HTTPApplication(object):'
491 461
492 462 def status_view(self, request):
493 463 import vcsserver
494 return {'status': 'OK', 'vcsserver_version': vcsserver.__version__,
495 'pid': os.getpid()}
464 _platform_id = platform.uname()[1] or 'instance'
465
466 return {
467 "status": "OK",
468 "vcsserver_version": vcsserver.get_version(),
469 "platform": _platform_id,
470 "pid": os.getpid(),
471 }
496 472
497 473 def service_view(self, request):
498 474 import vcsserver
@@ -514,12 +490,12 b' class HTTPApplication(object):'
514 490 except Exception:
515 491 log.exception('Failed to read .ini file for display')
516 492
517 environ = os.environ.items()
493 environ = list(os.environ.items())
518 494
519 495 resp = {
520 496 'id': payload.get('id'),
521 497 'result': dict(
522 version=vcsserver.__version__,
498 version=vcsserver.get_version(),
523 499 config=server_config,
524 500 app_config=app_config,
525 501 environ=environ,
@@ -529,14 +505,28 b' class HTTPApplication(object):'
529 505 return resp
530 506
531 507 def _msgpack_renderer_factory(self, info):
508
532 509 def _render(value, system):
510 bin_type = False
511 res = value.get('result')
512 if isinstance(res, BytesEnvelope):
513 log.debug('Result is wrapped in BytesEnvelope type')
514 bin_type = True
515 elif isinstance(res, BinaryEnvelope):
516 log.debug('Result is wrapped in BinaryEnvelope type')
517 value['result'] = res.val
518 bin_type = True
519
533 520 request = system.get('request')
534 521 if request is not None:
535 522 response = request.response
536 523 ct = response.content_type
537 524 if ct == response.default_content_type:
538 525 response.content_type = 'application/x-msgpack'
539 return msgpack.packb(value)
526 if bin_type:
527 response.content_type = 'application/x-msgpack-bin'
528
529 return msgpack.packb(value, use_bin_type=bin_type)
540 530 return _render
541 531
542 532 def set_env_from_config(self, environ, config):
@@ -589,16 +579,17 b' class HTTPApplication(object):'
589 579 @wsgiapp
590 580 def _hg_stream(environ, start_response):
591 581 log.debug('http-app: handling hg stream')
592 repo_path = environ['HTTP_X_RC_REPO_PATH']
593 repo_name = environ['HTTP_X_RC_REPO_NAME']
594 packed_config = base64.b64decode(
595 environ['HTTP_X_RC_REPO_CONFIG'])
596 config = msgpack.unpackb(packed_config)
582 call_context = get_headers_call_context(environ)
583
584 repo_path = call_context['repo_path']
585 repo_name = call_context['repo_name']
586 config = call_context['repo_config']
587
597 588 app = scm_app.create_hg_wsgi_app(
598 589 repo_path, repo_name, config)
599 590
600 591 # Consistent path information for hgweb
601 environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
592 environ['PATH_INFO'] = call_context['path_info']
602 593 environ['REPO_NAME'] = repo_name
603 594 self.set_env_from_config(environ, config)
604 595
@@ -618,13 +609,14 b' class HTTPApplication(object):'
618 609 @wsgiapp
619 610 def _git_stream(environ, start_response):
620 611 log.debug('http-app: handling git stream')
621 repo_path = environ['HTTP_X_RC_REPO_PATH']
622 repo_name = environ['HTTP_X_RC_REPO_NAME']
623 packed_config = base64.b64decode(
624 environ['HTTP_X_RC_REPO_CONFIG'])
625 config = msgpack.unpackb(packed_config)
612
613 call_context = get_headers_call_context(environ)
626 614
627 environ['PATH_INFO'] = environ['HTTP_X_RC_PATH_INFO']
615 repo_path = call_context['repo_path']
616 repo_name = call_context['repo_name']
617 config = call_context['repo_config']
618
619 environ['PATH_INFO'] = call_context['path_info']
628 620 self.set_env_from_config(environ, config)
629 621
630 622 content_type = environ.get('CONTENT_TYPE', '')
@@ -660,31 +652,39 b' class HTTPApplication(object):'
660 652
661 653 def handle_vcs_exception(self, exception, request):
662 654 _vcs_kind = getattr(exception, '_vcs_kind', '')
655
663 656 if _vcs_kind == 'repo_locked':
664 # Get custom repo-locked status code if present.
665 status_code = request.headers.get('X-RC-Locked-Status-Code')
657 headers_call_context = get_headers_call_context(request.environ)
658 status_code = safe_int(headers_call_context['locked_status_code'])
659
666 660 return HTTPRepoLocked(
667 title=exception.message, status_code=status_code)
661 title=str(exception), status_code=status_code, headers=[('X-Rc-Locked', '1')])
668 662
669 663 elif _vcs_kind == 'repo_branch_protected':
670 664 # Get custom repo-branch-protected status code if present.
671 return HTTPRepoBranchProtected(title=exception.message)
665 return HTTPRepoBranchProtected(
666 title=str(exception), headers=[('X-Rc-Branch-Protection', '1')])
672 667
673 668 exc_info = request.exc_info
674 669 store_exception(id(exc_info), exc_info)
675 670
676 671 traceback_info = 'unavailable'
677 672 if request.exc_info:
678 exc_type, exc_value, exc_tb = request.exc_info
679 traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
673 traceback_info = format_exc(request.exc_info)
680 674
681 675 log.error(
682 'error occurred handling this request for path: %s, \n tb: %s',
676 'error occurred handling this request for path: %s, \n%s',
683 677 request.path, traceback_info)
678
679 statsd = request.registry.statsd
680 if statsd:
681 exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
682 statsd.incr('vcsserver_exception_total',
683 tags=[f"type:{exc_type}"])
684 684 raise exception
685 685
686 686
687 class ResponseFilter(object):
687 class ResponseFilter:
688 688
689 689 def __init__(self, start_response):
690 690 self._start_response = start_response
@@ -696,10 +696,80 b' class ResponseFilter(object):'
696 696 return self._start_response(status, headers, exc_info)
697 697
698 698
699 def sanitize_settings_and_apply_defaults(global_config, settings):
700 _global_settings_maker = SettingsMaker(global_config)
701 settings_maker = SettingsMaker(settings)
702
703 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
704
705 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
706 settings_maker.enable_logging(logging_conf)
707
708 # Default includes, possible to change as a user
709 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
710 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
711
712 settings_maker.make_setting('__file__', global_config.get('__file__'))
713
714 settings_maker.make_setting('pyramid.default_locale_name', 'en')
715 settings_maker.make_setting('locale', 'en_US.UTF-8')
716
717 settings_maker.make_setting('core.binary_dir', '')
718
719 temp_store = tempfile.gettempdir()
720 default_cache_dir = os.path.join(temp_store, 'rc_cache')
721 # save default, cache dir, and use it for all backends later.
722 default_cache_dir = settings_maker.make_setting(
723 'cache_dir',
724 default=default_cache_dir, default_when_empty=True,
725 parser='dir:ensured')
726
727 # exception store cache
728 settings_maker.make_setting(
729 'exception_tracker.store_path',
730 default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
731 parser='dir:ensured'
732 )
733
734 # repo_object cache defaults
735 settings_maker.make_setting(
736 'rc_cache.repo_object.backend',
737 default='dogpile.cache.rc.file_namespace',
738 parser='string')
739 settings_maker.make_setting(
740 'rc_cache.repo_object.expiration_time',
741 default=30 * 24 * 60 * 60, # 30days
742 parser='int')
743 settings_maker.make_setting(
744 'rc_cache.repo_object.arguments.filename',
745 default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
746 parser='string')
747
748 # statsd
749 settings_maker.make_setting('statsd.enabled', False, parser='bool')
750 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
751 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
752 settings_maker.make_setting('statsd.statsd_prefix', '')
753 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
754
755 settings_maker.env_expand()
756
757
699 758 def main(global_config, **settings):
759 start_time = time.time()
760 log.info('Pyramid app config starting')
761
700 762 if MercurialFactory:
701 763 hgpatches.patch_largefiles_capabilities()
702 764 hgpatches.patch_subrepo_type_mapping()
703 765
704 app = HTTPApplication(settings=settings, global_config=global_config)
705 return app.wsgi_app()
766 # Fill in and sanitize the defaults & do ENV expansion
767 sanitize_settings_and_apply_defaults(global_config, settings)
768
769 # init and bootstrap StatsdClient
770 StatsdClient.setup(settings)
771
772 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
773 total_time = time.time() - start_time
774 log.info('Pyramid app created and configured in %.2fs', total_time)
775 return pyramid_app
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,13 +1,14 b''
1 import sys
1
2 2 import threading
3 3 import weakref
4 4 from base64 import b64encode
5 5 from logging import getLogger
6 6 from os import urandom
7 from typing import Union
7 8
8 9 from redis import StrictRedis
9 10
10 __version__ = '3.7.0'
11 __version__ = '4.0.0'
11 12
12 13 loggers = {
13 14 k: getLogger("vcsserver." + ".".join((__name__, k)))
@@ -23,14 +24,8 b' loggers = {'
23 24 ]
24 25 }
25 26
26 PY3 = sys.version_info[0] == 3
27
28 if PY3:
29 text_type = str
30 binary_type = bytes
31 else:
32 text_type = unicode # noqa
33 binary_type = str
27 text_type = str
28 binary_type = bytes
34 29
35 30
36 31 # Check if the id match. If not, return an error code.
@@ -107,15 +102,19 b' class NotExpirable(RuntimeError):'
107 102 pass
108 103
109 104
110 class Lock(object):
105 class Lock:
111 106 """
112 107 A Lock context manager implemented via redis SETNX/BLPOP.
113 108 """
109
114 110 unlock_script = None
115 111 extend_script = None
116 112 reset_script = None
117 113 reset_all_script = None
118 114
115 _lock_renewal_interval: float
116 _lock_renewal_thread: Union[threading.Thread, None]
117
119 118 def __init__(self, redis_client, name, expire=None, id=None, auto_renewal=False, strict=True, signal_expire=1000):
120 119 """
121 120 :param redis_client:
@@ -172,7 +171,7 b' class Lock(object):'
172 171 elif isinstance(id, text_type):
173 172 self._id = id
174 173 else:
175 raise TypeError("Incorrect type for `id`. Must be bytes/str not %s." % type(id))
174 raise TypeError(f"Incorrect type for `id`. Must be bytes/str not {type(id)}.")
176 175 self._name = 'lock:' + name
177 176 self._signal = 'lock-signal:' + name
178 177 self._lock_renewal_interval = (float(expire) * 2 / 3
@@ -186,11 +185,11 b' class Lock(object):'
186 185 def register_scripts(cls, redis_client):
187 186 global reset_all_script
188 187 if reset_all_script is None:
189 reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
190 188 cls.unlock_script = redis_client.register_script(UNLOCK_SCRIPT)
191 189 cls.extend_script = redis_client.register_script(EXTEND_SCRIPT)
192 190 cls.reset_script = redis_client.register_script(RESET_SCRIPT)
193 191 cls.reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
192 reset_all_script = redis_client.register_script(RESET_ALL_SCRIPT)
194 193
195 194 @property
196 195 def _held(self):
@@ -221,7 +220,7 b' class Lock(object):'
221 220 """
222 221 logger = loggers["acquire"]
223 222
224 logger.debug("Getting acquire on %r ...", self._name)
223 logger.debug("Getting blocking: %s acquire on %r ...", blocking, self._name)
225 224
226 225 if self._held:
227 226 owner_id = self.get_owner_id()
@@ -233,10 +232,10 b' class Lock(object):'
233 232 if timeout:
234 233 timeout = int(timeout)
235 234 if timeout < 0:
236 raise InvalidTimeout("Timeout (%d) cannot be less than or equal to 0" % timeout)
235 raise InvalidTimeout(f"Timeout ({timeout}) cannot be less than or equal to 0")
237 236
238 237 if self._expire and not self._lock_renewal_interval and timeout > self._expire:
239 raise TimeoutTooLarge("Timeout (%d) cannot be greater than expire (%d)" % (timeout, self._expire))
238 raise TimeoutTooLarge(f"Timeout ({timeout}) cannot be greater than expire ({self._expire})")
240 239
241 240 busy = True
242 241 blpop_timeout = timeout or self._expire or 0
@@ -249,16 +248,17 b' class Lock(object):'
249 248 elif blocking:
250 249 timed_out = not self._client.blpop(self._signal, blpop_timeout) and timeout
251 250 else:
252 logger.warning("Failed to get %r.", self._name)
251 logger.warning("Failed to acquire Lock(%r).", self._name)
253 252 return False
254 253
255 logger.info("Got lock for %r.", self._name)
254 logger.debug("Acquired Lock(%r).", self._name)
256 255 if self._lock_renewal_interval is not None:
257 256 self._start_lock_renewer()
258 257 return True
259 258
260 259 def extend(self, expire=None):
261 """Extends expiration time of the lock.
260 """
261 Extends expiration time of the lock.
262 262
263 263 :param expire:
264 264 New expiration time. If ``None`` - `expire` provided during
@@ -278,29 +278,29 b' class Lock(object):'
278 278
279 279 error = self.extend_script(client=self._client, keys=(self._name, self._signal), args=(self._id, expire))
280 280 if error == 1:
281 raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
281 raise NotAcquired(f"Lock {self._name} is not acquired or it already expired.")
282 282 elif error == 2:
283 raise NotExpirable("Lock %s has no assigned expiration time" % self._name)
283 raise NotExpirable(f"Lock {self._name} has no assigned expiration time")
284 284 elif error:
285 raise RuntimeError("Unsupported error code %s from EXTEND script" % error)
285 raise RuntimeError(f"Unsupported error code {error} from EXTEND script")
286 286
287 287 @staticmethod
288 def _lock_renewer(lockref, interval, stop):
288 def _lock_renewer(name, lockref, interval, stop):
289 289 """
290 290 Renew the lock key in redis every `interval` seconds for as long
291 291 as `self._lock_renewal_thread.should_exit` is False.
292 292 """
293 293 while not stop.wait(timeout=interval):
294 loggers["refresh.thread.start"].debug("Refreshing lock")
295 lock = lockref()
294 loggers["refresh.thread.start"].debug("Refreshing Lock(%r).", name)
295 lock: "Lock" = lockref()
296 296 if lock is None:
297 297 loggers["refresh.thread.stop"].debug(
298 "The lock no longer exists, stopping lock refreshing"
298 "Stopping loop because Lock(%r) was garbage collected.", name
299 299 )
300 300 break
301 301 lock.extend(expire=lock._expire)
302 302 del lock
303 loggers["refresh.thread.exit"].debug("Exit requested, stopping lock refreshing")
303 loggers["refresh.thread.exit"].debug("Exiting renewal thread for Lock(%r).", name)
304 304
305 305 def _start_lock_renewer(self):
306 306 """
@@ -310,18 +310,21 b' class Lock(object):'
310 310 raise AlreadyStarted("Lock refresh thread already started")
311 311
312 312 loggers["refresh.start"].debug(
313 "Starting thread to refresh lock every %s seconds",
314 self._lock_renewal_interval
313 "Starting renewal thread for Lock(%r). Refresh interval: %s seconds.",
314 self._name, self._lock_renewal_interval
315 315 )
316 316 self._lock_renewal_stop = threading.Event()
317 317 self._lock_renewal_thread = threading.Thread(
318 318 group=None,
319 319 target=self._lock_renewer,
320 kwargs={'lockref': weakref.ref(self),
321 'interval': self._lock_renewal_interval,
322 'stop': self._lock_renewal_stop}
320 kwargs={
321 'name': self._name,
322 'lockref': weakref.ref(self),
323 'interval': self._lock_renewal_interval,
324 'stop': self._lock_renewal_stop,
325 },
323 326 )
324 self._lock_renewal_thread.setDaemon(True)
327 self._lock_renewal_thread.daemon = True
325 328 self._lock_renewal_thread.start()
326 329
327 330 def _stop_lock_renewer(self):
@@ -332,15 +335,16 b' class Lock(object):'
332 335 """
333 336 if self._lock_renewal_thread is None or not self._lock_renewal_thread.is_alive():
334 337 return
335 loggers["refresh.shutdown"].debug("Signalling the lock refresher to stop")
338 loggers["refresh.shutdown"].debug("Signaling renewal thread for Lock(%r) to exit.", self._name)
336 339 self._lock_renewal_stop.set()
337 340 self._lock_renewal_thread.join()
338 341 self._lock_renewal_thread = None
339 loggers["refresh.exit"].debug("Lock refresher has stopped")
342 loggers["refresh.exit"].debug("Renewal thread for Lock(%r) exited.", self._name)
340 343
341 344 def __enter__(self):
342 345 acquired = self.acquire(blocking=True)
343 assert acquired, "Lock wasn't acquired, but blocking=True"
346 if not acquired:
347 raise AssertionError(f"Lock({self._name}) wasn't acquired, but blocking=True was used!")
344 348 return self
345 349
346 350 def __exit__(self, exc_type=None, exc_value=None, traceback=None):
@@ -358,12 +362,12 b' class Lock(object):'
358 362 """
359 363 if self._lock_renewal_thread is not None:
360 364 self._stop_lock_renewer()
361 loggers["release"].debug("Releasing %r.", self._name)
365 loggers["release"].debug("Releasing Lock(%r).", self._name)
362 366 error = self.unlock_script(client=self._client, keys=(self._name, self._signal), args=(self._id, self._signal_expire))
363 367 if error == 1:
364 raise NotAcquired("Lock %s is not acquired or it already expired." % self._name)
368 raise NotAcquired(f"Lock({self._name}) is not acquired or it already expired.")
365 369 elif error:
366 raise RuntimeError("Unsupported error code %s from EXTEND script." % error)
370 raise RuntimeError(f"Unsupported error code {error} from EXTEND script.")
367 371
368 372 def locked(self):
369 373 """
@@ -1,5 +1,3 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import logging
4 2
5 3 from .stream import TCPStatsClient, UnixSocketStatsClient # noqa
@@ -38,8 +36,14 b' def client_from_config(configuration, pr'
38 36 ipv6 = asbool(_config.pop('statsd_ipv6', IPV6))
39 37 log.debug('configured statsd client %s:%s', host, port)
40 38
41 return StatsClient(
42 host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
39 try:
40 client = StatsClient(
41 host=host, port=port, prefix=prefix, maxudpsize=maxudpsize, ipv6=ipv6)
42 except Exception:
43 log.exception('StatsD is enabled, but failed to connect to statsd server, fallback: disable statsd')
44 client = None
45
46 return client
43 47
44 48
45 49 def get_statsd_client(request):
@@ -1,13 +1,35 b''
1 from __future__ import absolute_import, division, unicode_literals
2
1 import re
3 2 import random
4 3 from collections import deque
5 4 from datetime import timedelta
5 from repoze.lru import lru_cache
6 6
7 7 from .timer import Timer
8 8
9 TAG_INVALID_CHARS_RE = re.compile(
10 r"[^\w\d_\-:/\.]",
11 #re.UNICODE
12 )
13 TAG_INVALID_CHARS_SUBS = "_"
9 14
10 class StatsClientBase(object):
15 # we save and expose methods called by statsd for discovery
16 buckets_dict = {
17
18 }
19
20
21 @lru_cache(maxsize=500)
22 def _normalize_tags_with_cache(tag_list):
23 return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list]
24
25
26 def normalize_tags(tag_list):
27 # We have to turn our input tag list into a non-mutable tuple for it to
28 # be hashable (and thus usable) by the @lru_cache decorator.
29 return _normalize_tags_with_cache(tuple(tag_list))
30
31
32 class StatsClientBase:
11 33 """A Base class for various statsd clients."""
12 34
13 35 def close(self):
@@ -20,10 +42,19 b' class StatsClientBase(object):'
20 42 def pipeline(self):
21 43 raise NotImplementedError()
22 44
23 def timer(self, stat, rate=1):
24 return Timer(self, stat, rate)
45 def timer(self, stat, rate=1, tags=None, auto_send=True):
46 """
47 statsd = StatsdClient.statsd
48 with statsd.timer('bucket_name', auto_send=True) as tmr:
49 # This block will be timed.
50 for i in range(0, 100000):
51 i ** 2
52 # you can access time here...
53 elapsed_ms = tmr.ms
54 """
55 return Timer(self, stat, rate, tags, auto_send=auto_send)
25 56
26 def timing(self, stat, delta, rate=1):
57 def timing(self, stat, delta, rate=1, tags=None, use_decimals=True):
27 58 """
28 59 Send new timing information.
29 60
@@ -32,17 +63,21 b' class StatsClientBase(object):'
32 63 if isinstance(delta, timedelta):
33 64 # Convert timedelta to number of milliseconds.
34 65 delta = delta.total_seconds() * 1000.
35 self._send_stat(stat, '%0.6f|ms' % delta, rate)
36
37 def incr(self, stat, count=1, rate=1):
38 """Increment a stat by `count`."""
39 self._send_stat(stat, '%s|c' % count, rate)
66 if use_decimals:
67 fmt = '%0.6f|ms'
68 else:
69 fmt = '%s|ms'
70 self._send_stat(stat, fmt % delta, rate, tags)
40 71
41 def decr(self, stat, count=1, rate=1):
72 def incr(self, stat, count=1, rate=1, tags=None):
73 """Increment a stat by `count`."""
74 self._send_stat(stat, f'{count}|c', rate, tags)
75
76 def decr(self, stat, count=1, rate=1, tags=None):
42 77 """Decrement a stat by `count`."""
43 self.incr(stat, -count, rate)
78 self.incr(stat, -count, rate, tags)
44 79
45 def gauge(self, stat, value, rate=1, delta=False):
80 def gauge(self, stat, value, rate=1, delta=False, tags=None):
46 81 """Set a gauge value."""
47 82 if value < 0 and not delta:
48 83 if rate < 1:
@@ -50,28 +85,40 b' class StatsClientBase(object):'
50 85 return
51 86 with self.pipeline() as pipe:
52 87 pipe._send_stat(stat, '0|g', 1)
53 pipe._send_stat(stat, '%s|g' % value, 1)
88 pipe._send_stat(stat, f'{value}|g', 1)
54 89 else:
55 90 prefix = '+' if delta and value >= 0 else ''
56 self._send_stat(stat, '%s%s|g' % (prefix, value), rate)
91 self._send_stat(stat, f'{prefix}{value}|g', rate, tags)
57 92
58 93 def set(self, stat, value, rate=1):
59 94 """Set a set value."""
60 self._send_stat(stat, '%s|s' % value, rate)
95 self._send_stat(stat, f'{value}|s', rate)
96
97 def histogram(self, stat, value, rate=1, tags=None):
98 """Set a histogram"""
99 self._send_stat(stat, f'{value}|h', rate, tags)
61 100
62 def _send_stat(self, stat, value, rate):
63 self._after(self._prepare(stat, value, rate))
101 def _send_stat(self, stat, value, rate, tags=None):
102 self._after(self._prepare(stat, value, rate, tags))
64 103
65 def _prepare(self, stat, value, rate):
104 def _prepare(self, stat, value, rate, tags=None):
105 global buckets_dict
106 buckets_dict[stat] = 1
107
66 108 if rate < 1:
67 109 if random.random() > rate:
68 110 return
69 value = '%s|@%s' % (value, rate)
111 value = f'{value}|@{rate}'
70 112
71 113 if self._prefix:
72 stat = '%s.%s' % (self._prefix, stat)
114 stat = f'{self._prefix}.{stat}'
73 115
74 return '%s:%s' % (stat, value)
116 res = '%s:%s%s' % (
117 stat,
118 value,
119 ("|#" + ",".join(normalize_tags(tags))) if tags else "",
120 )
121 return res
75 122
76 123 def _after(self, data):
77 124 if data:
@@ -1,5 +1,3 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import socket
4 2
5 3 from .base import StatsClientBase, PipelineBase
@@ -1,14 +1,5 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import functools
4
5 # Use timer that's not susceptible to time of day adjustments.
6 try:
7 # perf_counter is only present on Py3.3+
8 from time import perf_counter as time_now
9 except ImportError:
10 # fall back to using time
11 from time import time as time_now
2 from time import perf_counter as time_now
12 3
13 4
14 5 def safe_wraps(wrapper, *args, **kwargs):
@@ -18,16 +9,19 b' def safe_wraps(wrapper, *args, **kwargs)'
18 9 return functools.wraps(wrapper, *args, **kwargs)
19 10
20 11
21 class Timer(object):
12 class Timer:
22 13 """A context manager/decorator for statsd.timing()."""
23 14
24 def __init__(self, client, stat, rate=1):
15 def __init__(self, client, stat, rate=1, tags=None, use_decimals=True, auto_send=True):
25 16 self.client = client
26 17 self.stat = stat
27 18 self.rate = rate
19 self.tags = tags
28 20 self.ms = None
29 21 self._sent = False
30 22 self._start_time = None
23 self.use_decimals = use_decimals
24 self.auto_send = auto_send
31 25
32 26 def __call__(self, f):
33 27 """Thread-safe timing function decorator."""
@@ -38,14 +32,15 b' class Timer(object):'
38 32 return f(*args, **kwargs)
39 33 finally:
40 34 elapsed_time_ms = 1000.0 * (time_now() - start_time)
41 self.client.timing(self.stat, elapsed_time_ms, self.rate)
35 self.client.timing(self.stat, elapsed_time_ms, self.rate, self.tags, self.use_decimals)
36 self._sent = True
42 37 return _wrapped
43 38
44 39 def __enter__(self):
45 40 return self.start()
46 41
47 42 def __exit__(self, typ, value, tb):
48 self.stop()
43 self.stop(send=self.auto_send)
49 44
50 45 def start(self):
51 46 self.ms = None
@@ -68,4 +63,4 b' class Timer(object):'
68 63 if self._sent:
69 64 raise RuntimeError('Already sent data.')
70 65 self._sent = True
71 self.client.timing(self.stat, self.ms, self.rate)
66 self.client.timing(self.stat, self.ms, self.rate, self.tags, self.use_decimals)
@@ -1,5 +1,3 b''
1 from __future__ import absolute_import, division, unicode_literals
2
3 1 import socket
4 2
5 3 from .base import StatsClientBase, PipelineBase
@@ -8,7 +6,7 b' from .base import StatsClientBase, Pipel'
8 6 class Pipeline(PipelineBase):
9 7
10 8 def __init__(self, client):
11 super(Pipeline, self).__init__(client)
9 super().__init__(client)
12 10 self._maxudpsize = client._maxudpsize
13 11
14 12 def _send(self):
@@ -1,7 +1,5 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
@@ -17,16 +15,16 b''
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20
18 import io
21 19 import os
22 20 import time
21 import sys
23 22 import datetime
24 23 import msgpack
25 24 import logging
26 25 import traceback
27 26 import tempfile
28
29 from pyramid import compat
27 import glob
30 28
31 29 log = logging.getLogger(__name__)
32 30
@@ -35,16 +33,17 b" global_prefix = 'vcsserver'"
35 33 exc_store_dir_name = 'rc_exception_store_v1'
36 34
37 35
38 def exc_serialize(exc_id, tb, exc_type):
39
36 def exc_serialize(exc_id, tb, exc_type, extra_data=None):
40 37 data = {
41 'version': 'v1',
42 'exc_id': exc_id,
43 'exc_utc_date': datetime.datetime.utcnow().isoformat(),
44 'exc_timestamp': repr(time.time()),
45 'exc_message': tb,
46 'exc_type': exc_type,
38 "version": "v1",
39 "exc_id": exc_id,
40 "exc_utc_date": datetime.datetime.utcnow().isoformat(),
41 "exc_timestamp": repr(time.time()),
42 "exc_message": tb,
43 "exc_type": exc_type,
47 44 }
45 if extra_data:
46 data.update(extra_data)
48 47 return msgpack.packb(data), data
49 48
50 49
@@ -52,59 +51,153 b' def exc_unserialize(tb):'
52 51 return msgpack.unpackb(tb)
53 52
54 53
54 _exc_store = None
55
56
55 57 def get_exc_store():
56 58 """
57 59 Get and create exception store if it's not existing
58 60 """
61 global _exc_store
62
63 if _exc_store is not None:
64 # quick global cache
65 return _exc_store
66
59 67 import vcsserver as app
60 68
61 exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir()
69 exc_store_dir = (
70 app.CONFIG.get("exception_tracker.store_path", "") or tempfile.gettempdir()
71 )
62 72 _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name)
63 73
64 74 _exc_store_path = os.path.abspath(_exc_store_path)
65 75 if not os.path.isdir(_exc_store_path):
66 76 os.makedirs(_exc_store_path)
67 log.debug('Initializing exceptions store at %s', _exc_store_path)
77 log.debug("Initializing exceptions store at %s", _exc_store_path)
78 _exc_store = _exc_store_path
79
68 80 return _exc_store_path
69 81
70 82
71 def _store_exception(exc_id, exc_info, prefix, request_path=''):
72 exc_type, exc_value, exc_traceback = exc_info
83 def get_detailed_tb(exc_info):
84 try:
85 from pip._vendor.rich import (
86 traceback as rich_tb,
87 scope as rich_scope,
88 console as rich_console,
89 )
90 except ImportError:
91 try:
92 from rich import (
93 traceback as rich_tb,
94 scope as rich_scope,
95 console as rich_console,
96 )
97 except ImportError:
98 return None
99
100 console = rich_console.Console(width=160, file=io.StringIO())
101
102 exc = rich_tb.Traceback.extract(*exc_info, show_locals=True)
103
104 tb_rich = rich_tb.Traceback(
105 trace=exc,
106 width=160,
107 extra_lines=3,
108 theme=None,
109 word_wrap=False,
110 show_locals=False,
111 max_frames=100,
112 )
73 113
74 tb = ''.join(traceback.format_exception(
75 exc_type, exc_value, exc_traceback, None))
114 # last_stack = exc.stacks[-1]
115 # last_frame = last_stack.frames[-1]
116 # if last_frame and last_frame.locals:
117 # console.print(
118 # rich_scope.render_scope(
119 # last_frame.locals,
120 # title=f'{last_frame.filename}:{last_frame.lineno}'))
121
122 console.print(tb_rich)
123 formatted_locals = console.file.getvalue()
124
125 return formatted_locals
126
76 127
77 detailed_tb = getattr(exc_value, '_org_exc_tb', None)
128 def get_request_metadata(request=None) -> dict:
129 request_metadata = {}
130 if not request:
131 from pyramid.threadlocal import get_current_request
132
133 request = get_current_request()
134
135 # NOTE(marcink): store request information into exc_data
136 if request:
137 request_metadata["client_address"] = getattr(request, "client_addr", "")
138 request_metadata["user_agent"] = getattr(request, "user_agent", "")
139 request_metadata["method"] = getattr(request, "method", "")
140 request_metadata["url"] = getattr(request, "url", "")
141 return request_metadata
142
143
144 def format_exc(exc_info, use_detailed_tb=True):
145 exc_type, exc_value, exc_traceback = exc_info
146 tb = "++ TRACEBACK ++\n\n"
147 tb += "".join(traceback.format_exception(exc_type, exc_value, exc_traceback, None))
148
149 detailed_tb = getattr(exc_value, "_org_exc_tb", None)
78 150
79 151 if detailed_tb:
80 if isinstance(detailed_tb, compat.string_types):
152 remote_tb = detailed_tb
153 if isinstance(detailed_tb, str):
81 154 remote_tb = [detailed_tb]
82 155
83 156 tb += (
84 '\n+++ BEG SOURCE EXCEPTION +++\n\n'
85 '{}\n'
86 '+++ END SOURCE EXCEPTION +++\n'
87 ''.format('\n'.join(remote_tb))
157 "\n+++ BEG SOURCE EXCEPTION +++\n\n"
158 "{}\n"
159 "+++ END SOURCE EXCEPTION +++\n"
160 "".format("\n".join(remote_tb))
88 161 )
89 162
90 163 # Avoid that remote_tb also appears in the frame
91 164 del remote_tb
92 165
166 if use_detailed_tb:
167 locals_tb = get_detailed_tb(exc_info)
168 if locals_tb:
169 tb += f"\n+++ DETAILS +++\n\n{locals_tb}\n" ""
170 return tb
171
172
173 def _store_exception(exc_id, exc_info, prefix, request_path=''):
174 """
175 Low level function to store exception in the exception tracker
176 """
177
178 extra_data = {}
179 extra_data.update(get_request_metadata())
180
181 exc_type, exc_value, exc_traceback = exc_info
182 tb = format_exc(exc_info)
183
93 184 exc_type_name = exc_type.__name__
185 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name, extra_data=extra_data)
186
187 exc_pref_id = f"{exc_id}_{prefix}_{org_data['exc_timestamp']}"
94 188 exc_store_path = get_exc_store()
95 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name)
96 exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp'])
97 189 if not os.path.isdir(exc_store_path):
98 190 os.makedirs(exc_store_path)
99 191 stored_exc_path = os.path.join(exc_store_path, exc_pref_id)
100 with open(stored_exc_path, 'wb') as f:
192 with open(stored_exc_path, "wb") as f:
101 193 f.write(exc_data)
102 log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path)
194 log.debug("Stored generated exception %s as: %s", exc_id, stored_exc_path)
103 195
104 log.error(
105 'error occurred handling this request.\n'
106 'Path: `%s`, tb: %s',
107 request_path, tb)
196 if request_path:
197 log.error(
198 'error occurred handling this request.\n'
199 'Path: `%s`, %s',
200 request_path, tb)
108 201
109 202
110 203 def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''):
@@ -116,10 +209,15 b' def store_exception(exc_id, exc_info, pr'
116 209 """
117 210
118 211 try:
119 _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix,
120 request_path=request_path)
212 exc_type = exc_info[0]
213 exc_type_name = exc_type.__name__
214
215 _store_exception(
216 exc_id=exc_id, exc_info=exc_info, prefix=prefix, request_path=request_path,
217 )
218 return exc_id, exc_type_name
121 219 except Exception:
122 log.exception('Failed to store exception `%s` information', exc_id)
220 log.exception("Failed to store exception `%s` information", exc_id)
123 221 # there's no way this can fail, it will crash server badly if it does.
124 222 pass
125 223
@@ -127,30 +225,26 b' def store_exception(exc_id, exc_info, pr'
127 225 def _find_exc_file(exc_id, prefix=global_prefix):
128 226 exc_store_path = get_exc_store()
129 227 if prefix:
130 exc_id = '{}_{}'.format(exc_id, prefix)
228 exc_id = f"{exc_id}_{prefix}"
131 229 else:
132 230 # search without a prefix
133 exc_id = '{}'.format(exc_id)
231 exc_id = f"{exc_id}"
134 232
135 # we need to search the store for such start pattern as above
136 for fname in os.listdir(exc_store_path):
137 if fname.startswith(exc_id):
138 exc_id = os.path.join(exc_store_path, fname)
139 break
140 continue
141 else:
142 exc_id = None
233 found_exc_id = None
234 matches = glob.glob(os.path.join(exc_store_path, exc_id) + "*")
235 if matches:
236 found_exc_id = matches[0]
143 237
144 return exc_id
238 return found_exc_id
145 239
146 240
147 241 def _read_exception(exc_id, prefix):
148 242 exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix)
149 243 if exc_id_file_path:
150 with open(exc_id_file_path, 'rb') as f:
244 with open(exc_id_file_path, "rb") as f:
151 245 return exc_unserialize(f.read())
152 246 else:
153 log.debug('Exception File `%s` not found', exc_id_file_path)
247 log.debug("Exception File `%s` not found", exc_id_file_path)
154 248 return None
155 249
156 250
@@ -158,7 +252,7 b' def read_exception(exc_id, prefix=global'
158 252 try:
159 253 return _read_exception(exc_id=exc_id, prefix=prefix)
160 254 except Exception:
161 log.exception('Failed to read exception `%s` information', exc_id)
255 log.exception("Failed to read exception `%s` information", exc_id)
162 256 # there's no way this can fail, it will crash server badly if it does.
163 257 return None
164 258
@@ -170,6 +264,10 b' def delete_exception(exc_id, prefix=glob'
170 264 os.remove(exc_id_file_path)
171 265
172 266 except Exception:
173 log.exception('Failed to remove exception `%s` information', exc_id)
267 log.exception("Failed to remove exception `%s` information", exc_id)
174 268 # there's no way this can fail, it will crash server badly if it does.
175 269 pass
270
271
272 def generate_id():
273 return id(object())
@@ -1,7 +1,5 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
@@ -22,7 +20,7 b' import logging'
22 20
23 21 from repoze.lru import LRUCache
24 22
25 from vcsserver.utils import safe_str
23 from vcsserver.str_utils import safe_str
26 24
27 25 log = logging.getLogger(__name__)
28 26
@@ -45,7 +43,7 b' class LRUDict(LRUCache):'
45 43 del self.data[key]
46 44
47 45 def keys(self):
48 return self.data.keys()
46 return list(self.data.keys())
49 47
50 48
51 49 class LRUDictDebug(LRUDict):
@@ -53,11 +51,11 b' class LRUDictDebug(LRUDict):'
53 51 Wrapper to provide some debug options
54 52 """
55 53 def _report_keys(self):
56 elems_cnt = '%s/%s' % (len(self.keys()), self.size)
54 elems_cnt = f'{len(list(self.keys()))}/{self.size}'
57 55 # trick for pformat print it more nicely
58 56 fmt = '\n'
59 57 for cnt, elem in enumerate(self.keys()):
60 fmt += '%s - %s\n' % (cnt+1, safe_str(elem))
58 fmt += f'{cnt+1} - {safe_str(elem)}\n'
61 59 log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
62 60
63 61 def __getitem__(self, key):
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -16,31 +16,59 b''
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import logging
19 import threading
20
19 21 from dogpile.cache import register_backend
20 22
23 from . import region_meta
24 from .utils import (
25 backend_key_generator,
26 clear_cache_namespace,
27 get_default_cache_settings,
28 get_or_create_region,
29 make_region,
30 str2bool,
31 )
32
33 module_name = 'vcsserver'
34
21 35 register_backend(
22 "dogpile.cache.rc.memory_lru", "vcsserver.lib.rc_cache.backends",
36 "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends",
23 37 "LRUMemoryBackend")
24 38
25 39 register_backend(
26 "dogpile.cache.rc.file_namespace", "vcsserver.lib.rc_cache.backends",
40 "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends",
27 41 "FileNamespaceBackend")
28 42
29 43 register_backend(
30 "dogpile.cache.rc.redis", "vcsserver.lib.rc_cache.backends",
44 "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends",
31 45 "RedisPickleBackend")
32 46
33 47 register_backend(
34 "dogpile.cache.rc.redis_msgpack", "vcsserver.lib.rc_cache.backends",
48 "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends",
35 49 "RedisMsgPackBackend")
36 50
37 51
38 52 log = logging.getLogger(__name__)
39 53
40 from . import region_meta
41 from .utils import (
42 get_default_cache_settings, backend_key_generator, get_or_create_region,
43 clear_cache_namespace, make_region)
54
55 CACHE_OBJ_CACHE_VER = 'v2'
56
57 CLEAR_DELETE = 'delete'
58 CLEAR_INVALIDATE = 'invalidate'
59
60
61 def async_creation_runner(cache, cache_key, creator, mutex):
62
63 def runner():
64 try:
65 value = creator()
66 cache.set(cache_key, value)
67 finally:
68 mutex.release()
69
70 thread = threading.Thread(target=runner)
71 thread.start()
44 72
45 73
46 74 def configure_dogpile_cache(settings):
@@ -62,15 +90,22 b' def configure_dogpile_cache(settings):'
62 90
63 91 new_region = make_region(
64 92 name=namespace_name,
65 function_key_generator=None
93 function_key_generator=None,
94 async_creation_runner=None
66 95 )
67 96
68 new_region.configure_from_config(settings, 'rc_cache.{}.'.format(namespace_name))
97 new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.')
69 98 new_region.function_key_generator = backend_key_generator(new_region.actual_backend)
99
100 async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false'))
101 if async_creator:
102 log.debug('configuring region %s with async creator', new_region)
103 new_region.async_creation_runner = async_creation_runner
104
70 105 if log.isEnabledFor(logging.DEBUG):
71 region_args = dict(backend=new_region.actual_backend.__class__,
106 region_args = dict(backend=new_region.actual_backend,
72 107 region_invalidator=new_region.region_invalidator.__class__)
73 log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args)
108 log.debug('dogpile: registering a new region key=`%s` args=%s', namespace_name, region_args)
74 109
75 110 region_meta.dogpile_cache_regions[namespace_name] = new_region
76 111
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -15,25 +15,31 b''
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import time
19 import errno
18 #import errno
19 import fcntl
20 import functools
20 21 import logging
22 import os
23 import pickle
24 #import time
21 25
26 #import gevent
22 27 import msgpack
23 28 import redis
24 29
25 from dogpile.cache.api import CachedValue
26 from dogpile.cache.backends import memory as memory_backend
30 flock_org = fcntl.flock
31 from typing import Union
32
33 from dogpile.cache.api import Deserializer, Serializer
27 34 from dogpile.cache.backends import file as file_backend
35 from dogpile.cache.backends import memory as memory_backend
28 36 from dogpile.cache.backends import redis as redis_backend
29 from dogpile.cache.backends.file import NO_VALUE, compat, FileLock
37 from dogpile.cache.backends.file import FileLock
30 38 from dogpile.cache.util import memoized_property
31 39
32 from pyramid.settings import asbool
33
34 40 from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
35 from vcsserver.utils import safe_str
36
41 from vcsserver.str_utils import safe_bytes, safe_str
42 from vcsserver.type_utils import str2bool
37 43
38 44 _default_max_size = 1024
39 45
@@ -45,14 +51,20 b' class LRUMemoryBackend(memory_backend.Me'
45 51 pickle_values = False
46 52
47 53 def __init__(self, arguments):
48 max_size = arguments.pop('max_size', _default_max_size)
54 self.max_size = arguments.pop('max_size', _default_max_size)
49 55
50 56 LRUDictClass = LRUDict
51 57 if arguments.pop('log_key_count', None):
52 58 LRUDictClass = LRUDictDebug
53 59
54 arguments['cache_dict'] = LRUDictClass(max_size)
55 super(LRUMemoryBackend, self).__init__(arguments)
60 arguments['cache_dict'] = LRUDictClass(self.max_size)
61 super().__init__(arguments)
62
63 def __repr__(self):
64 return f'{self.__class__}(maxsize=`{self.max_size}`)'
65
66 def __str__(self):
67 return self.__repr__()
56 68
57 69 def delete(self, key):
58 70 try:
@@ -61,60 +73,37 b' class LRUMemoryBackend(memory_backend.Me'
61 73 # we don't care if key isn't there at deletion
62 74 pass
63 75
76 def list_keys(self, prefix):
77 return list(self._cache.keys())
78
64 79 def delete_multi(self, keys):
65 80 for key in keys:
66 81 self.delete(key)
67 82
68
69 class PickleSerializer(object):
70
71 def _dumps(self, value, safe=False):
72 try:
73 return compat.pickle.dumps(value)
74 except Exception:
75 if safe:
76 return NO_VALUE
77 else:
78 raise
79
80 def _loads(self, value, safe=True):
81 try:
82 return compat.pickle.loads(value)
83 except Exception:
84 if safe:
85 return NO_VALUE
86 else:
87 raise
83 def delete_multi_by_prefix(self, prefix):
84 cache_keys = self.list_keys(prefix=prefix)
85 num_affected_keys = len(cache_keys)
86 if num_affected_keys:
87 self.delete_multi(cache_keys)
88 return num_affected_keys
88 89
89 90
90 class MsgPackSerializer(object):
91
92 def _dumps(self, value, safe=False):
93 try:
94 return msgpack.packb(value)
95 except Exception:
96 if safe:
97 return NO_VALUE
98 else:
99 raise
100
101 def _loads(self, value, safe=True):
102 """
103 pickle maintained the `CachedValue` wrapper of the tuple
104 msgpack does not, so it must be added back in.
105 """
106 try:
107 value = msgpack.unpackb(value, use_list=False)
108 return CachedValue(*value)
109 except Exception:
110 if safe:
111 return NO_VALUE
112 else:
113 raise
91 class PickleSerializer:
92 serializer: None | Serializer = staticmethod( # type: ignore
93 functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
94 )
95 deserializer: None | Deserializer = staticmethod( # type: ignore
96 functools.partial(pickle.loads)
97 )
114 98
115 99
116 import fcntl
117 flock_org = fcntl.flock
100 class MsgPackSerializer:
101 serializer: None | Serializer = staticmethod( # type: ignore
102 msgpack.packb
103 )
104 deserializer: None | Deserializer = staticmethod( # type: ignore
105 functools.partial(msgpack.unpackb, use_list=False)
106 )
118 107
119 108
120 109 class CustomLockFactory(FileLock):
@@ -129,80 +118,75 b' class FileNamespaceBackend(PickleSeriali'
129 118 arguments['lock_factory'] = CustomLockFactory
130 119 db_file = arguments.get('filename')
131 120
132 log.debug('initialing %s DB in %s', self.__class__.__name__, db_file)
121 log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
122 db_file_dir = os.path.dirname(db_file)
123 if not os.path.isdir(db_file_dir):
124 os.makedirs(db_file_dir)
125
133 126 try:
134 super(FileNamespaceBackend, self).__init__(arguments)
127 super().__init__(arguments)
135 128 except Exception:
136 log.error('Failed to initialize db at: %s', db_file)
129 log.exception('Failed to initialize db at: %s', db_file)
137 130 raise
138 131
139 132 def __repr__(self):
140 return '{} `{}`'.format(self.__class__, self.filename)
133 return f'{self.__class__}(file=`{self.filename}`)'
134
135 def __str__(self):
136 return self.__repr__()
141 137
142 def list_keys(self, prefix=''):
143 prefix = '{}:{}'.format(self.key_prefix, prefix)
138 def _get_keys_pattern(self, prefix: bytes = b''):
139 return b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
144 140
145 def cond(v):
141 def list_keys(self, prefix: bytes = b''):
142 prefix = self._get_keys_pattern(prefix)
143
144 def cond(dbm_key: bytes):
146 145 if not prefix:
147 146 return True
148 147
149 if v.startswith(prefix):
148 if dbm_key.startswith(prefix):
150 149 return True
151 150 return False
152 151
153 152 with self._dbm_file(True) as dbm:
154 153 try:
155 return filter(cond, dbm.keys())
154 return list(filter(cond, dbm.keys()))
156 155 except Exception:
157 156 log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
158 157 raise
159 158
159 def delete_multi_by_prefix(self, prefix):
160 cache_keys = self.list_keys(prefix=prefix)
161 num_affected_keys = len(cache_keys)
162 if num_affected_keys:
163 self.delete_multi(cache_keys)
164 return num_affected_keys
165
160 166 def get_store(self):
161 167 return self.filename
162 168
163 def _dbm_get(self, key):
164 with self._dbm_file(False) as dbm:
165 if hasattr(dbm, 'get'):
166 value = dbm.get(key, NO_VALUE)
167 else:
168 # gdbm objects lack a .get method
169 try:
170 value = dbm[key]
171 except KeyError:
172 value = NO_VALUE
173 if value is not NO_VALUE:
174 value = self._loads(value)
175 return value
176
177 def get(self, key):
178 try:
179 return self._dbm_get(key)
180 except Exception:
181 log.error('Failed to fetch DBM key %s from DB: %s', key, self.get_store())
182 raise
183
184 def set(self, key, value):
185 with self._dbm_file(True) as dbm:
186 dbm[key] = self._dumps(value)
187
188 def set_multi(self, mapping):
189 with self._dbm_file(True) as dbm:
190 for key, value in mapping.items():
191 dbm[key] = self._dumps(value)
192
193 169
194 170 class BaseRedisBackend(redis_backend.RedisBackend):
195 171 key_prefix = ''
196 172
197 173 def __init__(self, arguments):
198 super(BaseRedisBackend, self).__init__(arguments)
174 self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
175 super().__init__(arguments)
176
199 177 self._lock_timeout = self.lock_timeout
200 self._lock_auto_renewal = asbool(arguments.pop("lock_auto_renewal", True))
178 self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
201 179
202 180 if self._lock_auto_renewal and not self._lock_timeout:
203 181 # set default timeout for auto_renewal
204 182 self._lock_timeout = 30
205 183
184 def __repr__(self):
185 return f'{self.__class__}(conn=`{self.db_conn}`)'
186
187 def __str__(self):
188 return self.__repr__()
189
206 190 def _create_client(self):
207 191 args = {}
208 192
@@ -216,58 +200,48 b' class BaseRedisBackend(redis_backend.Red'
216 200 )
217 201
218 202 connection_pool = redis.ConnectionPool(**args)
203 self.writer_client = redis.StrictRedis(
204 connection_pool=connection_pool
205 )
206 self.reader_client = self.writer_client
219 207
220 return redis.StrictRedis(connection_pool=connection_pool)
208 def _get_keys_pattern(self, prefix: bytes = b''):
209 return b'%b:%b*' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
210
211 def list_keys(self, prefix: bytes = b''):
212 prefix = self._get_keys_pattern(prefix)
213 return self.reader_client.keys(prefix)
221 214
222 def list_keys(self, prefix=''):
223 prefix = '{}:{}*'.format(self.key_prefix, prefix)
224 return self.client.keys(prefix)
215 def delete_multi_by_prefix(self, prefix, use_lua=False):
216 if use_lua:
217 # high efficient LUA script to delete ALL keys by prefix...
218 lua = """local keys = redis.call('keys', ARGV[1])
219 for i=1,#keys,5000 do
220 redis.call('del', unpack(keys, i, math.min(i+(5000-1), #keys)))
221 end
222 return #keys"""
223 num_affected_keys = self.writer_client.eval(
224 lua,
225 0,
226 f"{prefix}*")
227 else:
228 cache_keys = self.list_keys(prefix=prefix)
229 num_affected_keys = len(cache_keys)
230 if num_affected_keys:
231 self.delete_multi(cache_keys)
232 return num_affected_keys
225 233
226 234 def get_store(self):
227 return self.client.connection_pool
228
229 def get(self, key):
230 value = self.client.get(key)
231 if value is None:
232 return NO_VALUE
233 return self._loads(value)
234
235 def get_multi(self, keys):
236 if not keys:
237 return []
238 values = self.client.mget(keys)
239 loads = self._loads
240 return [
241 loads(v) if v is not None else NO_VALUE
242 for v in values]
243
244 def set(self, key, value):
245 if self.redis_expiration_time:
246 self.client.setex(key, self.redis_expiration_time,
247 self._dumps(value))
248 else:
249 self.client.set(key, self._dumps(value))
250
251 def set_multi(self, mapping):
252 dumps = self._dumps
253 mapping = dict(
254 (k, dumps(v))
255 for k, v in mapping.items()
256 )
257
258 if not self.redis_expiration_time:
259 self.client.mset(mapping)
260 else:
261 pipe = self.client.pipeline()
262 for key, value in mapping.items():
263 pipe.setex(key, self.redis_expiration_time, value)
264 pipe.execute()
235 return self.reader_client.connection_pool
265 236
266 237 def get_mutex(self, key):
267 238 if self.distributed_lock:
268 lock_key = redis_backend.u('_lock_{0}').format(safe_str(key))
269 return get_mutex_lock(self.client, lock_key, self._lock_timeout,
270 auto_renewal=self._lock_auto_renewal)
239 lock_key = f'_lock_{safe_str(key)}'
240 return get_mutex_lock(
241 self.writer_client, lock_key,
242 self._lock_timeout,
243 auto_renewal=self._lock_auto_renewal
244 )
271 245 else:
272 246 return None
273 247
@@ -283,9 +257,9 b' class RedisMsgPackBackend(MsgPackSeriali'
283 257
284 258
285 259 def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
286 import redis_lock
260 from vcsserver.lib._vendor import redis_lock
287 261
288 class _RedisLockWrapper(object):
262 class _RedisLockWrapper:
289 263 """LockWrapper for redis_lock"""
290 264
291 265 @classmethod
@@ -299,10 +273,10 b' def get_mutex_lock(client, lock_key, loc'
299 273 )
300 274
301 275 def __repr__(self):
302 return "{}:{}".format(self.__class__.__name__, lock_key)
276 return f"{self.__class__.__name__}:{lock_key}"
303 277
304 278 def __str__(self):
305 return "{}:{}".format(self.__class__.__name__, lock_key)
279 return f"{self.__class__.__name__}:{lock_key}"
306 280
307 281 def __init__(self):
308 282 self.lock = self.get_lock()
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -15,115 +15,69 b''
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 import os
19 import time
18 import functools
20 19 import logging
21 import functools
20 import os
21 import threading
22 import time
22 23
24 import decorator
23 25 from dogpile.cache import CacheRegion
24 from dogpile.cache.util import compat
26
25 27
26 from vcsserver.utils import safe_str, sha1
28 from vcsserver.utils import sha1
29 from vcsserver.str_utils import safe_bytes
30 from vcsserver.type_utils import str2bool # noqa :required by imports from .utils
27 31
28 from vcsserver.lib.rc_cache import region_meta
32 from . import region_meta
29 33
30 34 log = logging.getLogger(__name__)
31 35
32 36
33 37 class RhodeCodeCacheRegion(CacheRegion):
34 38
39 def __repr__(self):
40 return f'`{self.__class__.__name__}(name={self.name}, backend={self.backend.__class__})`'
41
35 42 def conditional_cache_on_arguments(
36 43 self, namespace=None,
37 44 expiration_time=None,
38 45 should_cache_fn=None,
39 to_str=compat.string_type,
46 to_str=str,
40 47 function_key_generator=None,
41 48 condition=True):
42 49 """
43 50 Custom conditional decorator, that will not touch any dogpile internals if
44 condition isn't meet. This works a bit different than should_cache_fn
51 condition isn't meet. This works a bit different from should_cache_fn
45 52 And it's faster in cases we don't ever want to compute cached values
46 53 """
47 expiration_time_is_callable = compat.callable(expiration_time)
54 expiration_time_is_callable = callable(expiration_time)
55 if not namespace:
56 namespace = getattr(self, '_default_namespace', None)
48 57
49 58 if function_key_generator is None:
50 59 function_key_generator = self.function_key_generator
51 60
52 # workaround for py2 and cython problems, this block should be removed
53 # once we've migrated to py3
54 if 'cython' == 'cython':
55 def decorator(fn):
56 if to_str is compat.string_type:
57 # backwards compatible
58 key_generator = function_key_generator(namespace, fn)
59 else:
60 key_generator = function_key_generator(namespace, fn, to_str=to_str)
61
62 @functools.wraps(fn)
63 def decorate(*arg, **kw):
64 key = key_generator(*arg, **kw)
65
66 @functools.wraps(fn)
67 def creator():
68 return fn(*arg, **kw)
69
70 if not condition:
71 return creator()
72
73 timeout = expiration_time() if expiration_time_is_callable \
74 else expiration_time
75
76 return self.get_or_create(key, creator, timeout, should_cache_fn)
77
78 def invalidate(*arg, **kw):
79 key = key_generator(*arg, **kw)
80 self.delete(key)
81
82 def set_(value, *arg, **kw):
83 key = key_generator(*arg, **kw)
84 self.set(key, value)
85
86 def get(*arg, **kw):
87 key = key_generator(*arg, **kw)
88 return self.get(key)
89
90 def refresh(*arg, **kw):
91 key = key_generator(*arg, **kw)
92 value = fn(*arg, **kw)
93 self.set(key, value)
94 return value
95
96 decorate.set = set_
97 decorate.invalidate = invalidate
98 decorate.refresh = refresh
99 decorate.get = get
100 decorate.original = fn
101 decorate.key_generator = key_generator
102 decorate.__wrapped__ = fn
103
104 return decorate
105 return decorator
106
107 def get_or_create_for_user_func(key_generator, user_func, *arg, **kw):
61 def get_or_create_for_user_func(func_key_generator, user_func, *arg, **kw):
108 62
109 63 if not condition:
110 log.debug('Calling un-cached method:%s', user_func.func_name)
64 log.debug('Calling un-cached method:%s', user_func.__name__)
111 65 start = time.time()
112 66 result = user_func(*arg, **kw)
113 67 total = time.time() - start
114 log.debug('un-cached method:%s took %.4fs', user_func.func_name, total)
68 log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
115 69 return result
116 70
117 key = key_generator(*arg, **kw)
71 key = func_key_generator(*arg, **kw)
118 72
119 73 timeout = expiration_time() if expiration_time_is_callable \
120 74 else expiration_time
121 75
122 log.debug('Calling cached method:`%s`', user_func.func_name)
76 log.debug('Calling cached method:`%s`', user_func.__name__)
123 77 return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
124 78
125 79 def cache_decorator(user_func):
126 if to_str is compat.string_type:
80 if to_str is str:
127 81 # backwards compatible
128 82 key_generator = function_key_generator(namespace, user_func)
129 83 else:
@@ -176,7 +130,7 b' def get_default_cache_settings(settings,'
176 130 if key.startswith(prefix):
177 131 name = key.split(prefix)[1].strip()
178 132 val = settings[key]
179 if isinstance(val, compat.string_types):
133 if isinstance(val, str):
180 134 val = val.strip()
181 135 cache_settings[name] = val
182 136 return cache_settings
@@ -186,7 +140,21 b' def compute_key_from_params(*args):'
186 140 """
187 141 Helper to compute key from given params to be used in cache manager
188 142 """
189 return sha1("_".join(map(safe_str, args)))
143 return sha1(safe_bytes("_".join(map(str, args))))
144
145
146 def custom_key_generator(backend, namespace, fn):
147 func_name = fn.__name__
148
149 def generate_key(*args):
150 backend_pref = getattr(backend, 'key_prefix', None) or 'backend_prefix'
151 namespace_pref = namespace or 'default_namespace'
152 arg_key = compute_key_from_params(*args)
153 final_key = f"{backend_pref}:{namespace_pref}:{func_name}_{arg_key}"
154
155 return final_key
156
157 return generate_key
190 158
191 159
192 160 def backend_key_generator(backend):
@@ -194,49 +162,51 b' def backend_key_generator(backend):'
194 162 Special wrapper that also sends over the backend to the key generator
195 163 """
196 164 def wrapper(namespace, fn):
197 return key_generator(backend, namespace, fn)
165 return custom_key_generator(backend, namespace, fn)
198 166 return wrapper
199 167
200 168
201 def key_generator(backend, namespace, fn):
202 fname = fn.__name__
169 def get_or_create_region(region_name, region_namespace: str = None, use_async_runner=False):
170 from .backends import FileNamespaceBackend
171 from . import async_creation_runner
203 172
204 def generate_key(*args):
205 backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix'
206 namespace_pref = namespace or 'default_namespace'
207 arg_key = compute_key_from_params(*args)
208 final_key = "{}:{}:{}_{}".format(backend_prefix, namespace_pref, fname, arg_key)
209
210 return final_key
211
212 return generate_key
213
214
215 def get_or_create_region(region_name, region_namespace=None):
216 from vcsserver.lib.rc_cache.backends import FileNamespaceBackend
217 173 region_obj = region_meta.dogpile_cache_regions.get(region_name)
218 174 if not region_obj:
219 raise EnvironmentError(
220 'Region `{}` not in configured: {}.'.format(
221 region_name, region_meta.dogpile_cache_regions.keys()))
175 reg_keys = list(region_meta.dogpile_cache_regions.keys())
176 raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
177
178 region_uid_name = f'{region_name}:{region_namespace}'
222 179
223 region_uid_name = '{}:{}'.format(region_name, region_namespace)
180 # Special case for ONLY the FileNamespaceBackend backend. We register one-file-per-region
224 181 if isinstance(region_obj.actual_backend, FileNamespaceBackend):
182 if not region_namespace:
183 raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
184
225 185 region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
226 186 if region_exist:
227 187 log.debug('Using already configured region: %s', region_namespace)
228 188 return region_exist
229 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
189
230 190 expiration_time = region_obj.expiration_time
231 191
232 if not os.path.isdir(cache_dir):
233 os.makedirs(cache_dir)
192 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
193 namespace_cache_dir = cache_dir
194
195 # we default the namespace_cache_dir to our default cache dir.
196 # however, if this backend is configured with filename= param, we prioritize that
197 # so all caches within that particular region, even those namespaced end up in the same path
198 if region_obj.actual_backend.filename:
199 namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
200
201 if not os.path.isdir(namespace_cache_dir):
202 os.makedirs(namespace_cache_dir)
234 203 new_region = make_region(
235 204 name=region_uid_name,
236 205 function_key_generator=backend_key_generator(region_obj.actual_backend)
237 206 )
207
238 208 namespace_filename = os.path.join(
239 cache_dir, "{}.cache.dbm".format(region_namespace))
209 namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
240 210 # special type that allows 1db per namespace
241 211 new_region.configure(
242 212 backend='dogpile.cache.rc.file_namespace',
@@ -248,16 +218,28 b' def get_or_create_region(region_name, re'
248 218 log.debug('configuring new region: %s', region_uid_name)
249 219 region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
250 220
221 region_obj._default_namespace = region_namespace
222 if use_async_runner:
223 region_obj.async_creation_runner = async_creation_runner
251 224 return region_obj
252 225
253 226
254 def clear_cache_namespace(cache_region, cache_namespace_uid, invalidate=False):
255 region = get_or_create_region(cache_region, cache_namespace_uid)
256 cache_keys = region.backend.list_keys(prefix=cache_namespace_uid)
257 num_delete_keys = len(cache_keys)
258 if invalidate:
259 region.invalidate(hard=False)
260 else:
261 if num_delete_keys:
262 region.delete_multi(cache_keys)
263 return num_delete_keys
227 def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, method: str) -> int:
228 from . import CLEAR_DELETE, CLEAR_INVALIDATE
229
230 if not isinstance(cache_region, RhodeCodeCacheRegion):
231 cache_region = get_or_create_region(cache_region, cache_namespace_uid)
232 log.debug('clearing cache region: %s [prefix:%s] with method=%s',
233 cache_region, cache_namespace_uid, method)
234
235 num_affected_keys = 0
236
237 if method == CLEAR_INVALIDATE:
238 # NOTE: The CacheRegion.invalidate() method’s default mode of
239 # operation is to set a timestamp local to this CacheRegion in this Python process only.
240 # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
241 cache_region.invalidate(hard=True)
242
243 if method == CLEAR_DELETE:
244 num_affected_keys = cache_region.backend.delete_multi_by_prefix(prefix=cache_namespace_uid)
245 return num_affected_keys
@@ -1,7 +1,5 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -21,17 +21,19 b' import os'
21 21 import socket
22 22 import logging
23 23
24 import simplejson as json
25 24 import dulwich.protocol
25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 from vcsserver.lib.rc_json import json
28 29 from vcsserver import hooks, subprocessio
30 from vcsserver.str_utils import ascii_bytes
29 31
30 32
31 33 log = logging.getLogger(__name__)
32 34
33 35
34 class FileWrapper(object):
36 class FileWrapper:
35 37 """File wrapper that ensures how much data is read from it."""
36 38
37 39 def __init__(self, fd, content_length):
@@ -54,34 +56,33 b' class FileWrapper(object):'
54 56 return data
55 57
56 58 def __repr__(self):
57 return '<FileWrapper %s len: %s, read: %s>' % (
59 return '<FileWrapper {} len: {}, read: {}>'.format(
58 60 self.fd, self.content_length, self.content_length - self.remain
59 61 )
60 62
61 63
62 class GitRepository(object):
64 class GitRepository:
63 65 """WSGI app for handling Git smart protocol endpoints."""
64 66
65 git_folder_signature = frozenset(
66 ('config', 'head', 'info', 'objects', 'refs'))
67 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
67 68 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
68 valid_accepts = frozenset(('application/x-%s-result' %
69 c for c in commands))
69 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 'PACK\x00\x00\x00\x02\x00\x00\x00\x00' +
74 '\x02\x9d\x08\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
73 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
74 b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 SIDE_BAND_CAPS = frozenset(('side-band', 'side-band-64k'))
76 FLUSH_PACKET = b"0000"
77 77
78 def __init__(self, repo_name, content_path, git_path, update_server_info,
79 extras):
78 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
79
80 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
80 81 files = frozenset(f.lower() for f in os.listdir(content_path))
81 82 valid_dir_signature = self.git_folder_signature.issubset(files)
82 83
83 84 if not valid_dir_signature:
84 raise OSError('%s missing git signature' % content_path)
85 raise OSError(f'{content_path} missing git signature')
85 86
86 87 self.content_path = content_path
87 88 self.repo_name = repo_name
@@ -122,8 +123,8 b' class GitRepository(object):'
122 123 # blows up if you sprinkle "flush" (0000) as "0001\n".
123 124 # It reads binary, per number of bytes specified.
124 125 # if you do add '\n' as part of data, count it.
125 server_advert = '# service=%s\n' % git_command
126 packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower()
126 server_advert = f'# service={git_command}\n'
127 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
127 128 try:
128 129 gitenv = dict(os.environ)
129 130 # forget all configs
@@ -133,15 +134,15 b' class GitRepository(object):'
133 134 out = subprocessio.SubprocessIOChunker(
134 135 command,
135 136 env=gitenv,
136 starting_values=[packet_len + server_advert + '0000'],
137 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
137 138 shell=False
138 139 )
139 except EnvironmentError:
140 except OSError:
140 141 log.exception('Error processing command')
141 142 raise exc.HTTPExpectationFailed()
142 143
143 144 resp = Response()
144 resp.content_type = 'application/x-%s-advertisement' % str(git_command)
145 resp.content_type = f'application/x-{git_command}-advertisement'
145 146 resp.charset = None
146 147 resp.app_iter = out
147 148
@@ -166,34 +167,103 b' class GitRepository(object):'
166 167 We also print in the error output a message explaining why the command
167 168 was aborted.
168 169
169 If aditionally, the user is accepting messages we send them the output
170 If additionally, the user is accepting messages we send them the output
170 171 of the pre-pull hook.
171 172
172 173 Note that for clients not supporting side-band we just send them the
173 174 emtpy PACK file.
174 175 """
176
175 177 if self.SIDE_BAND_CAPS.intersection(capabilities):
176 178 response = []
177 179 proto = dulwich.protocol.Protocol(None, response.append)
178 proto.write_pkt_line('NAK\n')
179 self._write_sideband_to_proto(pre_pull_messages, proto,
180 capabilities)
180 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
181
182 self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
181 183 # N.B.(skreft): Do not change the sideband channel to 3, as that
182 184 # produces a fatal error in the client:
183 185 # fatal: error in sideband demultiplexer
184 proto.write_sideband(2, 'Pre pull hook failed: aborting\n')
185 proto.write_sideband(1, self.EMPTY_PACK)
186 proto.write_sideband(
187 dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
188 ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
189 proto.write_sideband(
190 dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
191 ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
186 192
187 # writes 0000
193 # writes b"0000" as default
188 194 proto.write_pkt_line(None)
189 195
190 196 return response
191 197 else:
192 return [self.EMPTY_PACK]
198 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
199
200 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
201 """
202 Given a list response we inject the post-pull messages.
203
204 We only inject the messages if the client supports sideband, and the
205 response has the format:
206 0008NAK\n...0000
207
208 Note that we do not check the no-progress capability as by default, git
209 sends it, which effectively would block all messages.
210 """
211
212 if not self.SIDE_BAND_CAPS.intersection(capabilities):
213 return response
214
215 if not start_message and not end_message:
216 return response
217
218 try:
219 iter(response)
220 # iterator probably will work, we continue
221 except TypeError:
222 raise TypeError(f'response must be an iterator: got {type(response)}')
223 if isinstance(response, (list, tuple)):
224 raise TypeError(f'response must be an iterator: got {type(response)}')
225
226 def injected_response():
193 227
194 def _write_sideband_to_proto(self, data, proto, capabilities):
228 do_loop = 1
229 header_injected = 0
230 next_item = None
231 has_item = False
232 item = b''
233
234 while do_loop:
235
236 try:
237 next_item = next(response)
238 except StopIteration:
239 do_loop = 0
240
241 if has_item:
242 # last item ! alter it now
243 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
244 new_response = [item[:-4]]
245 new_response.extend(self._get_messages(end_message, capabilities))
246 new_response.append(self.FLUSH_PACKET)
247 item = b''.join(new_response)
248
249 yield item
250
251 has_item = True
252 item = next_item
253
254 # alter item if it's the initial chunk
255 if not header_injected and item.startswith(b'0008NAK\n'):
256 new_response = [b'0008NAK\n']
257 new_response.extend(self._get_messages(start_message, capabilities))
258 new_response.append(item[8:])
259 item = b''.join(new_response)
260 header_injected = 1
261
262 return injected_response()
263
264 def _write_sideband_to_proto(self, proto, data, capabilities):
195 265 """
196 Write the data to the proto's sideband number 2.
266 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
197 267
198 268 We do not use dulwich's write_sideband directly as it only supports
199 269 side-band-64k.
@@ -204,68 +274,27 b' class GitRepository(object):'
204 274 # N.B.(skreft): The values below are explained in the pack protocol
205 275 # documentation, section Packfile Data.
206 276 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
207 if 'side-band-64k' in capabilities:
277 if CAPABILITY_SIDE_BAND_64K in capabilities:
208 278 chunk_size = 65515
209 elif 'side-band' in capabilities:
279 elif CAPABILITY_SIDE_BAND in capabilities:
210 280 chunk_size = 995
211 281 else:
212 282 return
213 283
214 chunker = (
215 data[i:i + chunk_size] for i in xrange(0, len(data), chunk_size))
284 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
216 285
217 286 for chunk in chunker:
218 proto.write_sideband(2, chunk)
287 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
219 288
220 289 def _get_messages(self, data, capabilities):
221 290 """Return a list with packets for sending data in sideband number 2."""
222 291 response = []
223 292 proto = dulwich.protocol.Protocol(None, response.append)
224 293
225 self._write_sideband_to_proto(data, proto, capabilities)
294 self._write_sideband_to_proto(proto, data, capabilities)
226 295
227 296 return response
228 297
229 def _inject_messages_to_response(self, response, capabilities,
230 start_messages, end_messages):
231 """
232 Given a list response we inject the pre/post-pull messages.
233
234 We only inject the messages if the client supports sideband, and the
235 response has the format:
236 0008NAK\n...0000
237
238 Note that we do not check the no-progress capability as by default, git
239 sends it, which effectively would block all messages.
240 """
241 if not self.SIDE_BAND_CAPS.intersection(capabilities):
242 return response
243
244 if not start_messages and not end_messages:
245 return response
246
247 # make a list out of response if it's an iterator
248 # so we can investigate it for message injection.
249 if hasattr(response, '__iter__'):
250 response = list(response)
251
252 if (not response[0].startswith('0008NAK\n') or
253 not response[-1].endswith('0000')):
254 return response
255
256 new_response = ['0008NAK\n']
257 new_response.extend(self._get_messages(start_messages, capabilities))
258 if len(response) == 1:
259 new_response.append(response[0][8:-4])
260 else:
261 new_response.append(response[0][8:])
262 new_response.extend(response[1:-1])
263 new_response.append(response[-1][:-4])
264 new_response.extend(self._get_messages(end_messages, capabilities))
265 new_response.append('0000')
266
267 return new_response
268
269 298 def backend(self, request, environ):
270 299 """
271 300 WSGI Response producer for HTTP POST Git Smart HTTP requests.
@@ -304,14 +333,15 b' class GitRepository(object):'
304 333 inputstream = request.body_file_seekable
305 334
306 335 resp = Response()
307 resp.content_type = ('application/x-%s-result' %
308 git_command.encode('utf8'))
336 resp.content_type = f'application/x-{git_command}-result'
309 337 resp.charset = None
310 338
311 339 pre_pull_messages = ''
340 # Upload-pack == clone
312 341 if git_command == 'git-upload-pack':
313 status, pre_pull_messages = hooks.git_pre_pull(self.extras)
314 if status != 0:
342 hook_response = hooks.git_pre_pull(self.extras)
343 if hook_response.status != 0:
344 pre_pull_messages = hook_response.output
315 345 resp.app_iter = self._build_failed_pre_pull_response(
316 346 capabilities, pre_pull_messages)
317 347 return resp
@@ -326,7 +356,7 b' class GitRepository(object):'
326 356
327 357 out = subprocessio.SubprocessIOChunker(
328 358 cmd,
329 inputstream=inputstream,
359 input_stream=inputstream,
330 360 env=gitenv,
331 361 cwd=self.content_path,
332 362 shell=False,
@@ -346,7 +376,7 b' class GitRepository(object):'
346 376 log.debug('handling cmd %s', cmd)
347 377 output = subprocessio.SubprocessIOChunker(
348 378 cmd,
349 inputstream=inputstream,
379 input_stream=inputstream,
350 380 env=gitenv,
351 381 cwd=self.content_path,
352 382 shell=False,
@@ -357,10 +387,11 b' class GitRepository(object):'
357 387 for _ in output:
358 388 pass
359 389
390 # Upload-pack == clone
360 391 if git_command == 'git-upload-pack':
361 unused_status, post_pull_messages = hooks.git_post_pull(self.extras)
362 resp.app_iter = self._inject_messages_to_response(
363 out, capabilities, pre_pull_messages, post_pull_messages)
392 hook_response = hooks.git_post_pull(self.extras)
393 post_pull_messages = hook_response.output
394 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
364 395 else:
365 396 resp.app_iter = out
366 397
This diff has been collapsed as it changes many lines, (787 lines changed) Show them Hide them
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -18,12 +18,12 b''
18 18 import collections
19 19 import logging
20 20 import os
21 import posixpath as vcspath
22 21 import re
23 22 import stat
24 23 import traceback
25 import urllib
26 import urllib2
24 import urllib.request
25 import urllib.parse
26 import urllib.error
27 27 from functools import wraps
28 28
29 29 import more_itertools
@@ -31,17 +31,17 b' import pygit2'
31 31 from pygit2 import Repository as LibGit2Repo
32 32 from pygit2 import index as LibGit2Index
33 33 from dulwich import index, objects
34 from dulwich.client import HttpGitClient, LocalGitClient
34 from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult
35 35 from dulwich.errors import (
36 36 NotGitRepository, ChecksumMismatch, WrongObjectException,
37 37 MissingCommitError, ObjectMissing, HangupException,
38 38 UnexpectedCommandError)
39 39 from dulwich.repo import Repo as DulwichRepo
40 from dulwich.server import update_server_info
41 40
41 import rhodecode
42 42 from vcsserver import exceptions, settings, subprocessio
43 from vcsserver.utils import safe_str, safe_int, safe_unicode
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo
43 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes, convert_to_str
44 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope
45 45 from vcsserver.hgcompat import (
46 46 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
47 47 from vcsserver.git_lfs.lib import LFSOidStore
@@ -50,19 +50,12 b' from vcsserver.vcs_base import RemoteBas'
50 50 DIR_STAT = stat.S_IFDIR
51 51 FILE_MODE = stat.S_IFMT
52 52 GIT_LINK = objects.S_IFGITLINK
53 PEELED_REF_MARKER = '^{}'
54
53 PEELED_REF_MARKER = b'^{}'
54 HEAD_MARKER = b'HEAD'
55 55
56 56 log = logging.getLogger(__name__)
57 57
58 58
59 def str_to_dulwich(value):
60 """
61 Dulwich 0.10.1a requires `unicode` objects to be passed in.
62 """
63 return value.decode(settings.WIRE_ENCODING)
64
65
66 59 def reraise_safe_exceptions(func):
67 60 """Converts Dulwich exceptions to something neutral."""
68 61
@@ -76,8 +69,8 b' def reraise_safe_exceptions(func):'
76 69 except (HangupException, UnexpectedCommandError) as e:
77 70 exc = exceptions.VcsException(org_exc=e)
78 71 raise exc(safe_str(e))
79 except Exception as e:
80 # NOTE(marcink): becuase of how dulwich handles some exceptions
72 except Exception:
73 # NOTE(marcink): because of how dulwich handles some exceptions
81 74 # (KeyError on empty repos), we cannot track this and catch all
82 75 # exceptions, it's an exceptions from other handlers
83 76 #if not hasattr(e, '_vcs_kind'):
@@ -114,10 +107,14 b' class GitFactory(RepoFactory):'
114 107
115 108 def _create_repo(self, wire, create, use_libgit2=False):
116 109 if use_libgit2:
117 return Repository(wire['path'])
110 repo = Repository(safe_bytes(wire['path']))
118 111 else:
119 repo_path = str_to_dulwich(wire['path'])
120 return Repo(repo_path)
112 # dulwich mode
113 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
114 repo = Repo(repo_path)
115
116 log.debug('repository created: got GIT object: %s', repo)
117 return repo
121 118
122 119 def repo(self, wire, create=False, use_libgit2=False):
123 120 """
@@ -129,6 +126,28 b' class GitFactory(RepoFactory):'
129 126 return self.repo(wire, use_libgit2=True)
130 127
131 128
129 def create_signature_from_string(author_str, **kwargs):
130 """
131 Creates a pygit2.Signature object from a string of the format 'Name <email>'.
132
133 :param author_str: String of the format 'Name <email>'
134 :return: pygit2.Signature object
135 """
136 match = re.match(r'^(.+) <(.+)>$', author_str)
137 if match is None:
138 raise ValueError(f"Invalid format: {author_str}")
139
140 name, email = match.groups()
141 return pygit2.Signature(name, email, **kwargs)
142
143
144 def get_obfuscated_url(url_obj):
145 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
146 url_obj.query = obfuscate_qs(url_obj.query)
147 obfuscated_uri = str(url_obj)
148 return obfuscated_uri
149
150
132 151 class GitRemote(RemoteBase):
133 152
134 153 def __init__(self, factory):
@@ -141,35 +160,45 b' class GitRemote(RemoteBase):'
141 160 "parents": self.parents,
142 161 "_commit": self.revision,
143 162 }
163 self._bulk_file_methods = {
164 "size": self.get_node_size,
165 "data": self.get_node_data,
166 "flags": self.get_node_flags,
167 "is_binary": self.get_node_is_binary,
168 "md5": self.md5_hash
169 }
144 170
145 171 def _wire_to_config(self, wire):
146 172 if 'config' in wire:
147 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
173 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
148 174 return {}
149 175
150 176 def _remote_conf(self, config):
151 177 params = [
152 178 '-c', 'core.askpass=""',
153 179 ]
154 ssl_cert_dir = config.get('vcs_ssl_dir')
155 if ssl_cert_dir:
156 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
180 config_attrs = {
181 'vcs_ssl_dir': 'http.sslCAinfo={}',
182 'vcs_git_lfs_store_location': 'lfs.storage={}'
183 }
184 for key, param in config_attrs.items():
185 if value := config.get(key):
186 params.extend(['-c', param.format(value)])
157 187 return params
158 188
159 189 @reraise_safe_exceptions
160 190 def discover_git_version(self):
161 191 stdout, _ = self.run_git_command(
162 192 {}, ['--version'], _bare=True, _safe=True)
163 prefix = 'git version'
193 prefix = b'git version'
164 194 if stdout.startswith(prefix):
165 195 stdout = stdout[len(prefix):]
166 return stdout.strip()
196 return safe_str(stdout.strip())
167 197
168 198 @reraise_safe_exceptions
169 199 def is_empty(self, wire):
170 200 repo_init = self._factory.repo_libgit2(wire)
171 201 with repo_init as repo:
172
173 202 try:
174 203 has_head = repo.head.name
175 204 if has_head:
@@ -186,20 +215,27 b' class GitRemote(RemoteBase):'
186 215 def assert_correct_path(self, wire):
187 216 cache_on, context_uid, repo_id = self._cache_on(wire)
188 217 region = self._region(wire)
218
189 219 @region.conditional_cache_on_arguments(condition=cache_on)
190 def _assert_correct_path(_context_uid, _repo_id):
191 try:
192 repo_init = self._factory.repo_libgit2(wire)
193 with repo_init as repo:
194 pass
195 except pygit2.GitError:
196 path = wire.get('path')
197 tb = traceback.format_exc()
198 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
220 def _assert_correct_path(_context_uid, _repo_id, fast_check):
221 if fast_check:
222 path = safe_str(wire['path'])
223 if pygit2.discover_repository(path):
224 return True
199 225 return False
226 else:
227 try:
228 repo_init = self._factory.repo_libgit2(wire)
229 with repo_init:
230 pass
231 except pygit2.GitError:
232 path = wire.get('path')
233 tb = traceback.format_exc()
234 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
235 return False
236 return True
200 237
201 return True
202 return _assert_correct_path(context_uid, repo_id)
238 return _assert_correct_path(context_uid, repo_id, True)
203 239
204 240 @reraise_safe_exceptions
205 241 def bare(self, wire):
@@ -208,17 +244,69 b' class GitRemote(RemoteBase):'
208 244 return repo.is_bare
209 245
210 246 @reraise_safe_exceptions
247 def get_node_data(self, wire, commit_id, path):
248 repo_init = self._factory.repo_libgit2(wire)
249 with repo_init as repo:
250 commit = repo[commit_id]
251 blob_obj = commit.tree[path]
252
253 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
254 raise exceptions.LookupException()(
255 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
256
257 return BytesEnvelope(blob_obj.data)
258
259 @reraise_safe_exceptions
260 def get_node_size(self, wire, commit_id, path):
261 repo_init = self._factory.repo_libgit2(wire)
262 with repo_init as repo:
263 commit = repo[commit_id]
264 blob_obj = commit.tree[path]
265
266 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
267 raise exceptions.LookupException()(
268 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
269
270 return blob_obj.size
271
272 @reraise_safe_exceptions
273 def get_node_flags(self, wire, commit_id, path):
274 repo_init = self._factory.repo_libgit2(wire)
275 with repo_init as repo:
276 commit = repo[commit_id]
277 blob_obj = commit.tree[path]
278
279 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
280 raise exceptions.LookupException()(
281 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
282
283 return blob_obj.filemode
284
285 @reraise_safe_exceptions
286 def get_node_is_binary(self, wire, commit_id, path):
287 repo_init = self._factory.repo_libgit2(wire)
288 with repo_init as repo:
289 commit = repo[commit_id]
290 blob_obj = commit.tree[path]
291
292 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
293 raise exceptions.LookupException()(
294 f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}')
295
296 return blob_obj.is_binary
297
298 @reraise_safe_exceptions
211 299 def blob_as_pretty_string(self, wire, sha):
212 300 repo_init = self._factory.repo_libgit2(wire)
213 301 with repo_init as repo:
214 302 blob_obj = repo[sha]
215 blob = blob_obj.data
216 return blob
303 return BytesEnvelope(blob_obj.data)
217 304
218 305 @reraise_safe_exceptions
219 306 def blob_raw_length(self, wire, sha):
220 307 cache_on, context_uid, repo_id = self._cache_on(wire)
221 308 region = self._region(wire)
309
222 310 @region.conditional_cache_on_arguments(condition=cache_on)
223 311 def _blob_raw_length(_repo_id, _sha):
224 312
@@ -230,10 +318,10 b' class GitRemote(RemoteBase):'
230 318 return _blob_raw_length(repo_id, sha)
231 319
232 320 def _parse_lfs_pointer(self, raw_content):
321 spec_string = b'version https://git-lfs.github.com/spec'
322 if raw_content and raw_content.startswith(spec_string):
233 323
234 spec_string = 'version https://git-lfs.github.com/spec'
235 if raw_content and raw_content.startswith(spec_string):
236 pattern = re.compile(r"""
324 pattern = re.compile(rb"""
237 325 (?:\n)?
238 326 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
239 327 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
@@ -249,8 +337,8 b' class GitRemote(RemoteBase):'
249 337 @reraise_safe_exceptions
250 338 def is_large_file(self, wire, commit_id):
251 339 cache_on, context_uid, repo_id = self._cache_on(wire)
340 region = self._region(wire)
252 341
253 region = self._region(wire)
254 342 @region.conditional_cache_on_arguments(condition=cache_on)
255 343 def _is_large_file(_repo_id, _sha):
256 344 repo_init = self._factory.repo_libgit2(wire)
@@ -266,8 +354,8 b' class GitRemote(RemoteBase):'
266 354 @reraise_safe_exceptions
267 355 def is_binary(self, wire, tree_id):
268 356 cache_on, context_uid, repo_id = self._cache_on(wire)
357 region = self._region(wire)
269 358
270 region = self._region(wire)
271 359 @region.conditional_cache_on_arguments(condition=cache_on)
272 360 def _is_binary(_repo_id, _tree_id):
273 361 repo_init = self._factory.repo_libgit2(wire)
@@ -278,6 +366,26 b' class GitRemote(RemoteBase):'
278 366 return _is_binary(repo_id, tree_id)
279 367
280 368 @reraise_safe_exceptions
369 def md5_hash(self, wire, commit_id, path):
370 cache_on, context_uid, repo_id = self._cache_on(wire)
371 region = self._region(wire)
372
373 @region.conditional_cache_on_arguments(condition=cache_on)
374 def _md5_hash(_repo_id, _commit_id, _path):
375 repo_init = self._factory.repo_libgit2(wire)
376 with repo_init as repo:
377 commit = repo[_commit_id]
378 blob_obj = commit.tree[_path]
379
380 if blob_obj.type != pygit2.GIT_OBJ_BLOB:
381 raise exceptions.LookupException()(
382 f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}')
383
384 return ''
385
386 return _md5_hash(repo_id, commit_id, path)
387
388 @reraise_safe_exceptions
281 389 def in_largefiles_store(self, wire, oid):
282 390 conf = self._wire_to_config(wire)
283 391 repo_init = self._factory.repo_libgit2(wire)
@@ -305,90 +413,104 b' class GitRemote(RemoteBase):'
305 413 store = LFSOidStore(
306 414 oid=oid, repo=repo_name, store_location=store_location)
307 415 return store.oid_path
308 raise ValueError('Unable to fetch oid with path {}'.format(oid))
416 raise ValueError(f'Unable to fetch oid with path {oid}')
309 417
310 418 @reraise_safe_exceptions
311 419 def bulk_request(self, wire, rev, pre_load):
312 420 cache_on, context_uid, repo_id = self._cache_on(wire)
313 421 region = self._region(wire)
422
314 423 @region.conditional_cache_on_arguments(condition=cache_on)
315 424 def _bulk_request(_repo_id, _rev, _pre_load):
316 425 result = {}
317 426 for attr in pre_load:
318 427 try:
319 428 method = self._bulk_methods[attr]
429 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
320 430 args = [wire, rev]
321 431 result[attr] = method(*args)
322 432 except KeyError as e:
323 raise exceptions.VcsException(e)(
324 "Unknown bulk attribute: %s" % attr)
433 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
325 434 return result
326 435
327 436 return _bulk_request(repo_id, rev, sorted(pre_load))
328 437
329 def _build_opener(self, url):
438 @reraise_safe_exceptions
439 def bulk_file_request(self, wire, commit_id, path, pre_load):
440 cache_on, context_uid, repo_id = self._cache_on(wire)
441 region = self._region(wire)
442
443 @region.conditional_cache_on_arguments(condition=cache_on)
444 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
445 result = {}
446 for attr in pre_load:
447 try:
448 method = self._bulk_file_methods[attr]
449 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
450 result[attr] = method(wire, _commit_id, _path)
451 except KeyError as e:
452 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
453 return result
454
455 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
456
457 def _build_opener(self, url: str):
330 458 handlers = []
331 url_obj = url_parser(url)
332 _, authinfo = url_obj.authinfo()
459 url_obj = url_parser(safe_bytes(url))
460 authinfo = url_obj.authinfo()[1]
333 461
334 462 if authinfo:
335 463 # create a password manager
336 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
337 passmgr.add_password(*authinfo)
464 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
465 passmgr.add_password(*convert_to_str(authinfo))
338 466
339 467 handlers.extend((httpbasicauthhandler(passmgr),
340 468 httpdigestauthhandler(passmgr)))
341 469
342 return urllib2.build_opener(*handlers)
343
344 def _type_id_to_name(self, type_id):
345 return {
346 1: b'commit',
347 2: b'tree',
348 3: b'blob',
349 4: b'tag'
350 }[type_id]
470 return urllib.request.build_opener(*handlers)
351 471
352 472 @reraise_safe_exceptions
353 473 def check_url(self, url, config):
354 url_obj = url_parser(url)
355 test_uri, _ = url_obj.authinfo()
356 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
357 url_obj.query = obfuscate_qs(url_obj.query)
358 cleaned_uri = str(url_obj)
359 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
474 url_obj = url_parser(safe_bytes(url))
475
476 test_uri = safe_str(url_obj.authinfo()[0])
477 obfuscated_uri = get_obfuscated_url(url_obj)
478
479 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
360 480
361 481 if not test_uri.endswith('info/refs'):
362 482 test_uri = test_uri.rstrip('/') + '/info/refs'
363 483
364 o = self._build_opener(url)
484 o = self._build_opener(url=url)
365 485 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
366 486
367 487 q = {"service": 'git-upload-pack'}
368 qs = '?%s' % urllib.urlencode(q)
369 cu = "%s%s" % (test_uri, qs)
370 req = urllib2.Request(cu, None, {})
488 qs = f'?{urllib.parse.urlencode(q)}'
489 cu = f"{test_uri}{qs}"
371 490
372 491 try:
373 log.debug("Trying to open URL %s", cleaned_uri)
492 req = urllib.request.Request(cu, None, {})
493 log.debug("Trying to open URL %s", obfuscated_uri)
374 494 resp = o.open(req)
375 495 if resp.code != 200:
376 496 raise exceptions.URLError()('Return Code is not 200')
377 497 except Exception as e:
378 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
498 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
379 499 # means it cannot be cloned
380 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
500 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
381 501
382 502 # now detect if it's proper git repo
383 gitdata = resp.read()
384 if 'service=git-upload-pack' in gitdata:
503 gitdata: bytes = resp.read()
504
505 if b'service=git-upload-pack' in gitdata:
385 506 pass
386 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
387 # old style git can return some other format !
507 elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata):
508 # old style git can return some other format!
388 509 pass
389 510 else:
390 raise exceptions.URLError()(
391 "url [%s] does not look like an git" % (cleaned_uri,))
511 e = None
512 raise exceptions.URLError(e)(
513 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
392 514
393 515 return True
394 516
@@ -415,6 +537,7 b' class GitRemote(RemoteBase):'
415 537 def branch(self, wire, commit_id):
416 538 cache_on, context_uid, repo_id = self._cache_on(wire)
417 539 region = self._region(wire)
540
418 541 @region.conditional_cache_on_arguments(condition=cache_on)
419 542 def _branch(_context_uid, _repo_id, _commit_id):
420 543 regex = re.compile('^refs/heads')
@@ -422,7 +545,7 b' class GitRemote(RemoteBase):'
422 545 def filter_with(ref):
423 546 return regex.match(ref[0]) and ref[1] == _commit_id
424 547
425 branches = filter(filter_with, self.get_refs(wire).items())
548 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
426 549 return [x[0].split('refs/heads/')[-1] for x in branches]
427 550
428 551 return _branch(context_uid, repo_id, commit_id)
@@ -431,6 +554,7 b' class GitRemote(RemoteBase):'
431 554 def commit_branches(self, wire, commit_id):
432 555 cache_on, context_uid, repo_id = self._cache_on(wire)
433 556 region = self._region(wire)
557
434 558 @region.conditional_cache_on_arguments(condition=cache_on)
435 559 def _commit_branches(_context_uid, _repo_id, _commit_id):
436 560 repo_init = self._factory.repo_libgit2(wire)
@@ -449,152 +573,150 b' class GitRemote(RemoteBase):'
449 573 repo.object_store.add_object(blob)
450 574 return blob.id
451 575
452 # TODO: this is quite complex, check if that can be simplified
576 @reraise_safe_exceptions
577 def create_commit(self, wire, author, committer, message, branch, new_tree_id,
578 date_args: list[int, int] = None,
579 parents: list | None = None):
580
581 repo_init = self._factory.repo_libgit2(wire)
582 with repo_init as repo:
583
584 if date_args:
585 current_time, offset = date_args
586
587 kw = {
588 'time': current_time,
589 'offset': offset
590 }
591 author = create_signature_from_string(author, **kw)
592 committer = create_signature_from_string(committer, **kw)
593
594 tree = new_tree_id
595 if isinstance(tree, (bytes, str)):
596 # validate this tree is in the repo...
597 tree = repo[safe_str(tree)].id
598
599 if parents:
600 # run via sha's and validate them in repo
601 parents = [repo[c].id for c in parents]
602 else:
603 parents = []
604 # ensure we COMMIT on top of given branch head
605 # check if this repo has ANY branches, otherwise it's a new branch case we need to make
606 if branch in repo.branches.local:
607 parents += [repo.branches[branch].target]
608 elif [x for x in repo.branches.local]:
609 parents += [repo.head.target]
610 #else:
611 # in case we want to commit on new branch we create it on top of HEAD
612 #repo.branches.local.create(branch, repo.revparse_single('HEAD'))
613
614 # # Create a new commit
615 commit_oid = repo.create_commit(
616 f'refs/heads/{branch}', # the name of the reference to update
617 author, # the author of the commit
618 committer, # the committer of the commit
619 message, # the commit message
620 tree, # the tree produced by the index
621 parents # list of parents for the new commit, usually just one,
622 )
623
624 new_commit_id = safe_str(commit_oid)
625
626 return new_commit_id
627
453 628 @reraise_safe_exceptions
454 629 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
455 # Defines the root tree
456 class _Root(object):
457 def __repr__(self):
458 return 'ROOT TREE'
459 ROOT = _Root()
460 630
461 repo = self._factory.repo(wire)
462 object_store = repo.object_store
463
464 # Create tree and populates it with blobs
465
466 if commit_tree and repo[commit_tree]:
467 git_commit = repo[commit_data['parents'][0]]
468 commit_tree = repo[git_commit.tree] # root tree
469 else:
470 commit_tree = objects.Tree()
471
472 for node in updated:
473 # Compute subdirs if needed
474 dirpath, nodename = vcspath.split(node['path'])
475 dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
476 parent = commit_tree
477 ancestors = [('', parent)]
631 def mode2pygit(mode):
632 """
633 git only supports two filemode 644 and 755
478 634
479 # Tries to dig for the deepest existing tree
480 while dirnames:
481 curdir = dirnames.pop(0)
482 try:
483 dir_id = parent[curdir][1]
484 except KeyError:
485 # put curdir back into dirnames and stops
486 dirnames.insert(0, curdir)
487 break
488 else:
489 # If found, updates parent
490 parent = repo[dir_id]
491 ancestors.append((curdir, parent))
492 # Now parent is deepest existing tree and we need to create
493 # subtrees for dirnames (in reverse order)
494 # [this only applies for nodes from added]
495 new_trees = []
635 0o100755 -> 33261
636 0o100644 -> 33188
637 """
638 return {
639 0o100644: pygit2.GIT_FILEMODE_BLOB,
640 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
641 0o120000: pygit2.GIT_FILEMODE_LINK
642 }.get(mode) or pygit2.GIT_FILEMODE_BLOB
496 643
497 blob = objects.Blob.from_string(node['content'])
644 repo_init = self._factory.repo_libgit2(wire)
645 with repo_init as repo:
646 repo_index = repo.index
498 647
499 if dirnames:
500 # If there are trees which should be created we need to build
501 # them now (in reverse order)
502 reversed_dirnames = list(reversed(dirnames))
503 curtree = objects.Tree()
504 curtree[node['node_path']] = node['mode'], blob.id
505 new_trees.append(curtree)
506 for dirname in reversed_dirnames[:-1]:
507 newtree = objects.Tree()
508 newtree[dirname] = (DIR_STAT, curtree.id)
509 new_trees.append(newtree)
510 curtree = newtree
511 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
512 else:
513 parent.add(name=node['node_path'], mode=node['mode'], hexsha=blob.id)
648 commit_parents = None
649 if commit_tree and commit_data['parents']:
650 commit_parents = commit_data['parents']
651 parent_commit = repo[commit_parents[0]]
652 repo_index.read_tree(parent_commit.tree)
514 653
515 new_trees.append(parent)
516 # Update ancestors
517 reversed_ancestors = reversed(
518 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
519 for parent, tree, path in reversed_ancestors:
520 parent[path] = (DIR_STAT, tree.id)
521 object_store.add_object(tree)
654 for pathspec in updated:
655 blob_id = repo.create_blob(pathspec['content'])
656 ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode']))
657 repo_index.add(ie)
658
659 for pathspec in removed:
660 repo_index.remove(pathspec)
522 661
523 object_store.add_object(blob)
524 for tree in new_trees:
525 object_store.add_object(tree)
662 # Write changes to the index
663 repo_index.write()
664
665 # Create a tree from the updated index
666 written_commit_tree = repo_index.write_tree()
667
668 new_tree_id = written_commit_tree
526 669
527 for node_path in removed:
528 paths = node_path.split('/')
529 tree = commit_tree # start with top-level
530 trees = [{'tree': tree, 'path': ROOT}]
531 # Traverse deep into the forest...
532 # resolve final tree by iterating the path.
533 # e.g a/b/c.txt will get
534 # - root as tree then
535 # - 'a' as tree,
536 # - 'b' as tree,
537 # - stop at c as blob.
538 for path in paths:
539 try:
540 obj = repo[tree[path][1]]
541 if isinstance(obj, objects.Tree):
542 trees.append({'tree': obj, 'path': path})
543 tree = obj
544 except KeyError:
545 break
546 #PROBLEM:
547 """
548 We're not editing same reference tree object
549 """
550 # Cut down the blob and all rotten trees on the way back...
551 for path, tree_data in reversed(zip(paths, trees)):
552 tree = tree_data['tree']
553 tree.__delitem__(path)
554 # This operation edits the tree, we need to mark new commit back
670 author = commit_data['author']
671 committer = commit_data['committer']
672 message = commit_data['message']
673
674 date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])]
555 675
556 if len(tree) > 0:
557 # This tree still has elements - don't remove it or any
558 # of it's parents
559 break
560
561 object_store.add_object(commit_tree)
676 new_commit_id = self.create_commit(wire, author, committer, message, branch,
677 new_tree_id, date_args=date_args, parents=commit_parents)
562 678
563 # Create commit
564 commit = objects.Commit()
565 commit.tree = commit_tree.id
566 for k, v in commit_data.items():
567 setattr(commit, k, v)
568 object_store.add_object(commit)
679 # libgit2, ensure the branch is there and exists
680 self.create_branch(wire, branch, new_commit_id)
569 681
570 self.create_branch(wire, branch, commit.id)
682 # libgit2, set new ref to this created commit
683 self.set_refs(wire, f'refs/heads/{branch}', new_commit_id)
571 684
572 # dulwich set-ref
573 ref = 'refs/heads/%s' % branch
574 repo.refs[ref] = commit.id
575
576 return commit.id
685 return new_commit_id
577 686
578 687 @reraise_safe_exceptions
579 688 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
580 689 if url != 'default' and '://' not in url:
581 690 client = LocalGitClient(url)
582 691 else:
583 url_obj = url_parser(url)
692 url_obj = url_parser(safe_bytes(url))
584 693 o = self._build_opener(url)
585 url, _ = url_obj.authinfo()
694 url = url_obj.authinfo()[0]
586 695 client = HttpGitClient(base_url=url, opener=o)
587 696 repo = self._factory.repo(wire)
588 697
589 698 determine_wants = repo.object_store.determine_wants_all
699
590 700 if refs:
591 def determine_wants_requested(references):
592 return [references[r] for r in references if r in refs]
701 refs: list[bytes] = [ascii_bytes(x) for x in refs]
702
703 def determine_wants_requested(_remote_refs):
704 determined = []
705 for ref_name, ref_hash in _remote_refs.items():
706 bytes_ref_name = safe_bytes(ref_name)
707
708 if bytes_ref_name in refs:
709 bytes_ref_hash = safe_bytes(ref_hash)
710 determined.append(bytes_ref_hash)
711 return determined
712
713 # swap with our custom requested wants
593 714 determine_wants = determine_wants_requested
594 715
595 716 try:
596 717 remote_refs = client.fetch(
597 718 path=url, target=repo, determine_wants=determine_wants)
719
598 720 except NotGitRepository as e:
599 721 log.warning(
600 722 'Trying to fetch from "%s" failed, not a Git repository.', url)
@@ -618,19 +740,27 b' class GitRemote(RemoteBase):'
618 740 repo[k] = remote_refs[k]
619 741
620 742 if refs and not update_after:
743 # update to ref
621 744 # mikhail: explicitly set the head to the last ref.
622 repo["HEAD"] = remote_refs[refs[-1]]
745 update_to_ref = refs[-1]
746 if isinstance(update_after, str):
747 update_to_ref = update_after
748
749 repo[HEAD_MARKER] = remote_refs[update_to_ref]
623 750
624 751 if update_after:
625 # we want to checkout HEAD
626 repo["HEAD"] = remote_refs["HEAD"]
752 # we want to check out HEAD
753 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
627 754 index.build_index_from_tree(repo.path, repo.index_path(),
628 repo.object_store, repo["HEAD"].tree)
755 repo.object_store, repo[HEAD_MARKER].tree)
756
757 if isinstance(remote_refs, FetchPackResult):
758 return remote_refs.refs
629 759 return remote_refs
630 760
631 761 @reraise_safe_exceptions
632 def sync_fetch(self, wire, url, refs=None, all_refs=False):
633 repo = self._factory.repo(wire)
762 def sync_fetch(self, wire, url, refs=None, all_refs=False, **kwargs):
763 self._factory.repo(wire)
634 764 if refs and not isinstance(refs, (list, tuple)):
635 765 refs = [refs]
636 766
@@ -649,7 +779,7 b' class GitRemote(RemoteBase):'
649 779 fetch_refs = []
650 780
651 781 for ref_line in output.splitlines():
652 sha, ref = ref_line.split('\t')
782 sha, ref = ref_line.split(b'\t')
653 783 sha = sha.strip()
654 784 if ref in remote_refs:
655 785 # duplicate, skip
@@ -658,32 +788,38 b' class GitRemote(RemoteBase):'
658 788 log.debug("Skipping peeled reference %s", ref)
659 789 continue
660 790 # don't sync HEAD
661 if ref in ['HEAD']:
791 if ref in [HEAD_MARKER]:
662 792 continue
663 793
664 794 remote_refs[ref] = sha
665 795
666 796 if refs and sha in refs:
667 797 # we filter fetch using our specified refs
668 fetch_refs.append('{}:{}'.format(ref, ref))
798 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
669 799 elif not refs:
670 fetch_refs.append('{}:{}'.format(ref, ref))
800 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
671 801 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
672 802
673 803 if fetch_refs:
674 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
804 for chunk in more_itertools.chunked(fetch_refs, 128):
675 805 fetch_refs_chunks = list(chunk)
676 806 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
677 _out, _err = self.run_git_command(
807 self.run_git_command(
678 808 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
679 809 fail_on_stderr=False,
680 810 _copts=self._remote_conf(config),
681 811 extra_env={'GIT_TERMINAL_PROMPT': '0'})
812 if kwargs.get('sync_large_objects'):
813 self.run_git_command(
814 wire, ['lfs', 'fetch', url, '--all'],
815 fail_on_stderr=False,
816 _copts=self._remote_conf(config),
817 )
682 818
683 819 return remote_refs
684 820
685 821 @reraise_safe_exceptions
686 def sync_push(self, wire, url, refs=None):
822 def sync_push(self, wire, url, refs=None, **kwargs):
687 823 if not self.check_url(url, wire):
688 824 return
689 825 config = self._wire_to_config(wire)
@@ -692,6 +828,12 b' class GitRemote(RemoteBase):'
692 828 wire, ['push', url, '--mirror'], fail_on_stderr=False,
693 829 _copts=self._remote_conf(config),
694 830 extra_env={'GIT_TERMINAL_PROMPT': '0'})
831 if kwargs.get('sync_large_objects'):
832 self.run_git_command(
833 wire, ['lfs', 'push', url, '--all'],
834 fail_on_stderr=False,
835 _copts=self._remote_conf(config),
836 )
695 837
696 838 @reraise_safe_exceptions
697 839 def get_remote_refs(self, wire, url):
@@ -704,24 +846,29 b' class GitRemote(RemoteBase):'
704 846 return repo.get_description()
705 847
706 848 @reraise_safe_exceptions
707 def get_missing_revs(self, wire, rev1, rev2, path2):
849 def get_missing_revs(self, wire, rev1, rev2, other_repo_path):
850 origin_repo_path = wire['path']
708 851 repo = self._factory.repo(wire)
709 LocalGitClient(thin_packs=False).fetch(path2, repo)
852 # fetch from other_repo_path to our origin repo
853 LocalGitClient(thin_packs=False).fetch(other_repo_path, repo)
710 854
711 855 wire_remote = wire.copy()
712 wire_remote['path'] = path2
856 wire_remote['path'] = other_repo_path
713 857 repo_remote = self._factory.repo(wire_remote)
714 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
858
859 # fetch from origin_repo_path to our remote repo
860 LocalGitClient(thin_packs=False).fetch(origin_repo_path, repo_remote)
715 861
716 862 revs = [
717 863 x.commit.id
718 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
864 for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])]
719 865 return revs
720 866
721 867 @reraise_safe_exceptions
722 868 def get_object(self, wire, sha, maybe_unreachable=False):
723 869 cache_on, context_uid, repo_id = self._cache_on(wire)
724 870 region = self._region(wire)
871
725 872 @region.conditional_cache_on_arguments(condition=cache_on)
726 873 def _get_object(_context_uid, _repo_id, _sha):
727 874 repo_init = self._factory.repo_libgit2(wire)
@@ -766,11 +913,11 b' class GitRemote(RemoteBase):'
766 913 raise exceptions.LookupException(e)(missing_commit_err)
767 914
768 915 commit_id = commit.hex
769 type_id = commit.type
916 type_str = commit.type_str
770 917
771 918 return {
772 919 'id': commit_id,
773 'type': self._type_id_to_name(type_id),
920 'type': type_str,
774 921 'commit_id': commit_id,
775 922 'idx': 0
776 923 }
@@ -781,6 +928,7 b' class GitRemote(RemoteBase):'
781 928 def get_refs(self, wire):
782 929 cache_on, context_uid, repo_id = self._cache_on(wire)
783 930 region = self._region(wire)
931
784 932 @region.conditional_cache_on_arguments(condition=cache_on)
785 933 def _get_refs(_context_uid, _repo_id):
786 934
@@ -788,7 +936,7 b' class GitRemote(RemoteBase):'
788 936 with repo_init as repo:
789 937 regex = re.compile('^refs/(heads|tags)/')
790 938 return {x.name: x.target.hex for x in
791 filter(lambda ref: regex.match(ref.name) ,repo.listall_reference_objects())}
939 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
792 940
793 941 return _get_refs(context_uid, repo_id)
794 942
@@ -796,13 +944,14 b' class GitRemote(RemoteBase):'
796 944 def get_branch_pointers(self, wire):
797 945 cache_on, context_uid, repo_id = self._cache_on(wire)
798 946 region = self._region(wire)
947
799 948 @region.conditional_cache_on_arguments(condition=cache_on)
800 949 def _get_branch_pointers(_context_uid, _repo_id):
801 950
802 951 repo_init = self._factory.repo_libgit2(wire)
803 952 regex = re.compile('^refs/heads')
804 953 with repo_init as repo:
805 branches = filter(lambda ref: regex.match(ref.name), repo.listall_reference_objects())
954 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
806 955 return {x.target.hex: x.shorthand for x in branches}
807 956
808 957 return _get_branch_pointers(context_uid, repo_id)
@@ -811,6 +960,7 b' class GitRemote(RemoteBase):'
811 960 def head(self, wire, show_exc=True):
812 961 cache_on, context_uid, repo_id = self._cache_on(wire)
813 962 region = self._region(wire)
963
814 964 @region.conditional_cache_on_arguments(condition=cache_on)
815 965 def _head(_context_uid, _repo_id, _show_exc):
816 966 repo_init = self._factory.repo_libgit2(wire)
@@ -824,19 +974,22 b' class GitRemote(RemoteBase):'
824 974
825 975 @reraise_safe_exceptions
826 976 def init(self, wire):
827 repo_path = str_to_dulwich(wire['path'])
828 self.repo = Repo.init(repo_path)
977 repo_path = safe_str(wire['path'])
978 os.makedirs(repo_path, mode=0o755)
979 pygit2.init_repository(repo_path, bare=False)
829 980
830 981 @reraise_safe_exceptions
831 982 def init_bare(self, wire):
832 repo_path = str_to_dulwich(wire['path'])
833 self.repo = Repo.init_bare(repo_path)
983 repo_path = safe_str(wire['path'])
984 os.makedirs(repo_path, mode=0o755)
985 pygit2.init_repository(repo_path, bare=True)
834 986
835 987 @reraise_safe_exceptions
836 988 def revision(self, wire, rev):
837 989
838 990 cache_on, context_uid, repo_id = self._cache_on(wire)
839 991 region = self._region(wire)
992
840 993 @region.conditional_cache_on_arguments(condition=cache_on)
841 994 def _revision(_context_uid, _repo_id, _rev):
842 995 repo_init = self._factory.repo_libgit2(wire)
@@ -856,6 +1009,7 b' class GitRemote(RemoteBase):'
856 1009 def date(self, wire, commit_id):
857 1010 cache_on, context_uid, repo_id = self._cache_on(wire)
858 1011 region = self._region(wire)
1012
859 1013 @region.conditional_cache_on_arguments(condition=cache_on)
860 1014 def _date(_repo_id, _commit_id):
861 1015 repo_init = self._factory.repo_libgit2(wire)
@@ -876,6 +1030,7 b' class GitRemote(RemoteBase):'
876 1030 def author(self, wire, commit_id):
877 1031 cache_on, context_uid, repo_id = self._cache_on(wire)
878 1032 region = self._region(wire)
1033
879 1034 @region.conditional_cache_on_arguments(condition=cache_on)
880 1035 def _author(_repo_id, _commit_id):
881 1036 repo_init = self._factory.repo_libgit2(wire)
@@ -888,12 +1043,12 b' class GitRemote(RemoteBase):'
888 1043 author = commit.get_object().author
889 1044
890 1045 if author.email:
891 return u"{} <{}>".format(author.name, author.email)
1046 return f"{author.name} <{author.email}>"
892 1047
893 1048 try:
894 return u"{}".format(author.name)
1049 return f"{author.name}"
895 1050 except Exception:
896 return u"{}".format(safe_unicode(author.raw_name))
1051 return f"{safe_str(author.raw_name)}"
897 1052
898 1053 return _author(repo_id, commit_id)
899 1054
@@ -901,6 +1056,7 b' class GitRemote(RemoteBase):'
901 1056 def message(self, wire, commit_id):
902 1057 cache_on, context_uid, repo_id = self._cache_on(wire)
903 1058 region = self._region(wire)
1059
904 1060 @region.conditional_cache_on_arguments(condition=cache_on)
905 1061 def _message(_repo_id, _commit_id):
906 1062 repo_init = self._factory.repo_libgit2(wire)
@@ -913,6 +1069,7 b' class GitRemote(RemoteBase):'
913 1069 def parents(self, wire, commit_id):
914 1070 cache_on, context_uid, repo_id = self._cache_on(wire)
915 1071 region = self._region(wire)
1072
916 1073 @region.conditional_cache_on_arguments(condition=cache_on)
917 1074 def _parents(_repo_id, _commit_id):
918 1075 repo_init = self._factory.repo_libgit2(wire)
@@ -930,17 +1087,23 b' class GitRemote(RemoteBase):'
930 1087 def children(self, wire, commit_id):
931 1088 cache_on, context_uid, repo_id = self._cache_on(wire)
932 1089 region = self._region(wire)
1090
1091 head = self.head(wire)
1092
933 1093 @region.conditional_cache_on_arguments(condition=cache_on)
934 1094 def _children(_repo_id, _commit_id):
1095
935 1096 output, __ = self.run_git_command(
936 wire, ['rev-list', '--all', '--children'])
1097 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
937 1098
938 1099 child_ids = []
939 pat = re.compile(r'^%s' % commit_id)
940 for l in output.splitlines():
941 if pat.match(l):
942 found_ids = l.split(' ')[1:]
1100 pat = re.compile(fr'^{commit_id}')
1101 for line in output.splitlines():
1102 line = safe_str(line)
1103 if pat.match(line):
1104 found_ids = line.split(' ')[1:]
943 1105 child_ids.extend(found_ids)
1106 break
944 1107
945 1108 return child_ids
946 1109 return _children(repo_id, commit_id)
@@ -952,10 +1115,22 b' class GitRemote(RemoteBase):'
952 1115 repo.references.create(key, value, force=True)
953 1116
954 1117 @reraise_safe_exceptions
1118 def update_refs(self, wire, key, value):
1119 repo_init = self._factory.repo_libgit2(wire)
1120 with repo_init as repo:
1121 if key not in repo.references:
1122 raise ValueError(f'Reference {key} not found in the repository')
1123 repo.references.create(key, value, force=True)
1124
1125 @reraise_safe_exceptions
955 1126 def create_branch(self, wire, branch_name, commit_id, force=False):
956 1127 repo_init = self._factory.repo_libgit2(wire)
957 1128 with repo_init as repo:
958 commit = repo[commit_id]
1129 if commit_id:
1130 commit = repo[commit_id]
1131 else:
1132 # if commit is not given just use the HEAD
1133 commit = repo.head()
959 1134
960 1135 if force:
961 1136 repo.branches.local.create(branch_name, commit, force=force)
@@ -973,23 +1148,39 b' class GitRemote(RemoteBase):'
973 1148 def tag_remove(self, wire, tag_name):
974 1149 repo_init = self._factory.repo_libgit2(wire)
975 1150 with repo_init as repo:
976 key = 'refs/tags/{}'.format(tag_name)
1151 key = f'refs/tags/{tag_name}'
977 1152 repo.references.delete(key)
978 1153
979 1154 @reraise_safe_exceptions
980 1155 def tree_changes(self, wire, source_id, target_id):
981 # TODO(marcink): remove this seems it's only used by tests
982 1156 repo = self._factory.repo(wire)
1157 # source can be empty
1158 source_id = safe_bytes(source_id if source_id else b'')
1159 target_id = safe_bytes(target_id)
1160
983 1161 source = repo[source_id].tree if source_id else None
984 1162 target = repo[target_id].tree
985 1163 result = repo.object_store.tree_changes(source, target)
986 return list(result)
1164
1165 added = set()
1166 modified = set()
1167 deleted = set()
1168 for (old_path, new_path), (_, _), (_, _) in list(result):
1169 if new_path and old_path:
1170 modified.add(new_path)
1171 elif new_path and not old_path:
1172 added.add(new_path)
1173 elif not new_path and old_path:
1174 deleted.add(old_path)
1175
1176 return list(added), list(modified), list(deleted)
987 1177
988 1178 @reraise_safe_exceptions
989 1179 def tree_and_type_for_path(self, wire, commit_id, path):
990 1180
991 1181 cache_on, context_uid, repo_id = self._cache_on(wire)
992 1182 region = self._region(wire)
1183
993 1184 @region.conditional_cache_on_arguments(condition=cache_on)
994 1185 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
995 1186 repo_init = self._factory.repo_libgit2(wire)
@@ -1001,13 +1192,14 b' class GitRemote(RemoteBase):'
1001 1192 except KeyError:
1002 1193 return None, None, None
1003 1194
1004 return tree.id.hex, tree.type, tree.filemode
1195 return tree.id.hex, tree.type_str, tree.filemode
1005 1196 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1006 1197
1007 1198 @reraise_safe_exceptions
1008 1199 def tree_items(self, wire, tree_id):
1009 1200 cache_on, context_uid, repo_id = self._cache_on(wire)
1010 1201 region = self._region(wire)
1202
1011 1203 @region.conditional_cache_on_arguments(condition=cache_on)
1012 1204 def _tree_items(_repo_id, _tree_id):
1013 1205
@@ -1016,13 +1208,13 b' class GitRemote(RemoteBase):'
1016 1208 try:
1017 1209 tree = repo[tree_id]
1018 1210 except KeyError:
1019 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1211 raise ObjectMissing(f'No tree with id: {tree_id}')
1020 1212
1021 1213 result = []
1022 1214 for item in tree:
1023 1215 item_sha = item.hex
1024 1216 item_mode = item.filemode
1025 item_type = item.type
1217 item_type = item.type_str
1026 1218
1027 1219 if item_type == 'commit':
1028 1220 # NOTE(marcink): submodules we translate to 'link' for backward compat
@@ -1039,7 +1231,7 b' class GitRemote(RemoteBase):'
1039 1231 """
1040 1232
1041 1233 flags = [
1042 '-U%s' % context, '--patch',
1234 f'-U{context}', '--patch',
1043 1235 '--binary',
1044 1236 '--find-renames',
1045 1237 '--no-indent-heuristic',
@@ -1066,7 +1258,7 b' class GitRemote(RemoteBase):'
1066 1258 lines = diff.splitlines()
1067 1259 x = 0
1068 1260 for line in lines:
1069 if line.startswith('diff'):
1261 if line.startswith(b'diff'):
1070 1262 break
1071 1263 x += 1
1072 1264 # Append new line just like 'diff' command do
@@ -1076,6 +1268,7 b' class GitRemote(RemoteBase):'
1076 1268 @reraise_safe_exceptions
1077 1269 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1078 1270 repo_init = self._factory.repo_libgit2(wire)
1271
1079 1272 with repo_init as repo:
1080 1273 swap = True
1081 1274 flags = 0
@@ -1101,15 +1294,17 b' class GitRemote(RemoteBase):'
1101 1294 if file_filter:
1102 1295 for p in diff_obj:
1103 1296 if p.delta.old_file.path == file_filter:
1104 return p.patch or ''
1297 return BytesEnvelope(p.data) or BytesEnvelope(b'')
1105 1298 # fo matching path == no diff
1106 return ''
1107 return diff_obj.patch or ''
1299 return BytesEnvelope(b'')
1300
1301 return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'')
1108 1302
1109 1303 @reraise_safe_exceptions
1110 1304 def node_history(self, wire, commit_id, path, limit):
1111 1305 cache_on, context_uid, repo_id = self._cache_on(wire)
1112 1306 region = self._region(wire)
1307
1113 1308 @region.conditional_cache_on_arguments(condition=cache_on)
1114 1309 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1115 1310 # optimize for n==1, rev-list is much faster for that use-case
@@ -1122,14 +1317,14 b' class GitRemote(RemoteBase):'
1122 1317 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1123 1318
1124 1319 output, __ = self.run_git_command(wire, cmd)
1125 commit_ids = re.findall(r'[0-9a-fA-F]{40}', output)
1320 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1126 1321
1127 1322 return [x for x in commit_ids]
1128 1323 return _node_history(context_uid, repo_id, commit_id, path, limit)
1129 1324
1130 1325 @reraise_safe_exceptions
1131 def node_annotate(self, wire, commit_id, path):
1132
1326 def node_annotate_legacy(self, wire, commit_id, path):
1327 # note: replaced by pygit2 implementation
1133 1328 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1134 1329 # -l ==> outputs long shas (and we need all 40 characters)
1135 1330 # --root ==> doesn't put '^' character for boundaries
@@ -1137,22 +1332,44 b' class GitRemote(RemoteBase):'
1137 1332 output, __ = self.run_git_command(wire, cmd)
1138 1333
1139 1334 result = []
1140 for i, blame_line in enumerate(output.split('\n')[:-1]):
1335 for i, blame_line in enumerate(output.splitlines()[:-1]):
1141 1336 line_no = i + 1
1142 commit_id, line = re.split(r' ', blame_line, 1)
1143 result.append((line_no, commit_id, line))
1337 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1338 result.append((line_no, blame_commit_id, line))
1339
1144 1340 return result
1145 1341
1146 1342 @reraise_safe_exceptions
1147 def update_server_info(self, wire):
1148 repo = self._factory.repo(wire)
1149 update_server_info(repo)
1343 def node_annotate(self, wire, commit_id, path):
1344
1345 result_libgit = []
1346 repo_init = self._factory.repo_libgit2(wire)
1347 with repo_init as repo:
1348 commit = repo[commit_id]
1349 blame_obj = repo.blame(path, newest_commit=commit_id)
1350 for i, line in enumerate(commit.tree[path].data.splitlines()):
1351 line_no = i + 1
1352 hunk = blame_obj.for_line(line_no)
1353 blame_commit_id = hunk.final_commit_id.hex
1354
1355 result_libgit.append((line_no, blame_commit_id, line))
1356
1357 return BinaryEnvelope(result_libgit)
1358
1359 @reraise_safe_exceptions
1360 def update_server_info(self, wire, force=False):
1361 cmd = ['update-server-info']
1362 if force:
1363 cmd += ['--force']
1364 output, __ = self.run_git_command(wire, cmd)
1365 return output.splitlines()
1150 1366
1151 1367 @reraise_safe_exceptions
1152 1368 def get_all_commit_ids(self, wire):
1153 1369
1154 1370 cache_on, context_uid, repo_id = self._cache_on(wire)
1155 1371 region = self._region(wire)
1372
1156 1373 @region.conditional_cache_on_arguments(condition=cache_on)
1157 1374 def _get_all_commit_ids(_context_uid, _repo_id):
1158 1375
@@ -1163,11 +1380,22 b' class GitRemote(RemoteBase):'
1163 1380 except Exception:
1164 1381 # Can be raised for empty repositories
1165 1382 return []
1383
1384 @region.conditional_cache_on_arguments(condition=cache_on)
1385 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1386 repo_init = self._factory.repo_libgit2(wire)
1387 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1388 results = []
1389 with repo_init as repo:
1390 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1391 results.append(commit.id.hex)
1392
1166 1393 return _get_all_commit_ids(context_uid, repo_id)
1167 1394
1168 1395 @reraise_safe_exceptions
1169 1396 def run_git_command(self, wire, cmd, **opts):
1170 1397 path = wire.get('path', None)
1398 debug_mode = rhodecode.ConfigGet().get_bool('debug')
1171 1399
1172 1400 if path and os.path.isdir(path):
1173 1401 opts['cwd'] = path
@@ -1176,7 +1404,7 b' class GitRemote(RemoteBase):'
1176 1404 _copts = []
1177 1405 del opts['_bare']
1178 1406 else:
1179 _copts = ['-c', 'core.quotepath=false', ]
1407 _copts = ['-c', 'core.quotepath=false', '-c', 'advice.diverging=false']
1180 1408 safe_call = False
1181 1409 if '_safe' in opts:
1182 1410 # no exc on failure
@@ -1203,13 +1431,17 b' class GitRemote(RemoteBase):'
1203 1431 _opts.update(opts)
1204 1432 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1205 1433
1206 return ''.join(proc), ''.join(proc.error)
1207 except (EnvironmentError, OSError) as err:
1208 cmd = ' '.join(cmd) # human friendly CMD
1209 tb_err = ("Couldn't run git command (%s).\n"
1210 "Original error was:%s\n"
1211 "Call options:%s\n"
1212 % (cmd, err, _opts))
1434 return b''.join(proc), b''.join(proc.stderr)
1435 except OSError as err:
1436 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1437 call_opts = {}
1438 if debug_mode:
1439 call_opts = _opts
1440
1441 tb_err = ("Couldn't run git command ({}).\n"
1442 "Original error was:{}\n"
1443 "Call options:{}\n"
1444 .format(cmd, err, call_opts))
1213 1445 log.exception(tb_err)
1214 1446 if safe_call:
1215 1447 return '', err
@@ -1224,6 +1456,9 b' class GitRemote(RemoteBase):'
1224 1456 from vcsserver.hook_utils import install_git_hooks
1225 1457 bare = self.bare(wire)
1226 1458 path = wire['path']
1459 binary_dir = settings.BINARY_DIR
1460 if binary_dir:
1461 os.path.join(binary_dir, 'python3')
1227 1462 return install_git_hooks(path, bare, force_create=force)
1228 1463
1229 1464 @reraise_safe_exceptions
@@ -1240,13 +1475,15 b' class GitRemote(RemoteBase):'
1240 1475 @reraise_safe_exceptions
1241 1476 def set_head_ref(self, wire, head_name):
1242 1477 log.debug('Setting refs/head to `%s`', head_name)
1243 cmd = ['symbolic-ref', 'HEAD', 'refs/heads/%s' % head_name]
1244 output, __ = self.run_git_command(wire, cmd)
1245 return [head_name] + output.splitlines()
1478 repo_init = self._factory.repo_libgit2(wire)
1479 with repo_init as repo:
1480 repo.set_head(f'refs/heads/{head_name}')
1481
1482 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1246 1483
1247 1484 @reraise_safe_exceptions
1248 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1249 archive_dir_name, commit_id):
1485 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1486 archive_dir_name, commit_id, cache_config):
1250 1487
1251 1488 def file_walker(_commit_id, path):
1252 1489 repo_init = self._factory.repo_libgit2(wire)
@@ -1262,20 +1499,20 b' class GitRemote(RemoteBase):'
1262 1499 try:
1263 1500 tree = repo[tree_id]
1264 1501 except KeyError:
1265 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1502 raise ObjectMissing(f'No tree with id: {tree_id}')
1266 1503
1267 1504 index = LibGit2Index.Index()
1268 1505 index.read_tree(tree)
1269 1506 file_iter = index
1270 1507
1271 for fn in file_iter:
1272 file_path = fn.path
1273 mode = fn.mode
1508 for file_node in file_iter:
1509 file_path = file_node.path
1510 mode = file_node.mode
1274 1511 is_link = stat.S_ISLNK(mode)
1275 1512 if mode == pygit2.GIT_FILEMODE_COMMIT:
1276 1513 log.debug('Skipping path %s as a commit node', file_path)
1277 1514 continue
1278 yield ArchiveNode(file_path, mode, is_link, repo[fn.hex].read_raw)
1515 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1279 1516
1280 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1281 archive_dir_name, commit_id)
1517 return store_archive_in_cache(
1518 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
This diff has been collapsed as it changes many lines, (516 lines changed) Show them Hide them
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -14,33 +14,73 b''
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 import functools
17
18 import binascii
18 19 import io
19 20 import logging
20 import os
21 21 import stat
22 import urllib
23 import urllib2
24 import traceback
22 import sys
23 import urllib.request
24 import urllib.parse
25 import hashlib
25 26
26 from hgext import largefiles, rebase, purge
27 from hgext.strip import strip as hgext_strip
27 from hgext import largefiles, rebase
28
28 29 from mercurial import commands
29 30 from mercurial import unionrepo
30 31 from mercurial import verify
31 32 from mercurial import repair
33 from mercurial.error import AmbiguousPrefixLookupError
32 34
33 35 import vcsserver
34 36 from vcsserver import exceptions
35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode
37 from vcsserver.base import (
38 RepoFactory,
39 obfuscate_qs,
40 raise_from_original,
41 store_archive_in_cache,
42 ArchiveNode,
43 BytesEnvelope,
44 BinaryEnvelope,
45 )
36 46 from vcsserver.hgcompat import (
37 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
39 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
40 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
41 RepoLookupError, InterventionRequired, RequirementError,
42 alwaysmatcher, patternmatcher, hgutil)
47 archival,
48 bin,
49 clone,
50 config as hgconfig,
51 diffopts,
52 hex,
53 get_ctx,
54 hg_url as url_parser,
55 httpbasicauthhandler,
56 httpdigestauthhandler,
57 makepeer,
58 instance,
59 match,
60 memctx,
61 exchange,
62 memfilectx,
63 nullrev,
64 hg_merge,
65 patch,
66 peer,
67 revrange,
68 ui,
69 hg_tag,
70 Abort,
71 LookupError,
72 RepoError,
73 RepoLookupError,
74 InterventionRequired,
75 RequirementError,
76 alwaysmatcher,
77 patternmatcher,
78 hgext_strip,
79 )
80 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes, convert_to_str
43 81 from vcsserver.vcs_base import RemoteBase
82 from vcsserver.config import hooks as hooks_config
83 from vcsserver.lib.exc_tracking import format_exc
44 84
45 85 log = logging.getLogger(__name__)
46 86
@@ -48,25 +88,31 b' log = logging.getLogger(__name__)'
48 88 def make_ui_from_config(repo_config):
49 89
50 90 class LoggingUI(ui.ui):
91
51 92 def status(self, *msg, **opts):
52 log.info(' '.join(msg).rstrip('\n'))
53 super(LoggingUI, self).status(*msg, **opts)
93 str_msg = map(safe_str, msg)
94 log.info(' '.join(str_msg).rstrip('\n'))
95 #super(LoggingUI, self).status(*msg, **opts)
54 96
55 97 def warn(self, *msg, **opts):
56 log.warn(' '.join(msg).rstrip('\n'))
57 super(LoggingUI, self).warn(*msg, **opts)
98 str_msg = map(safe_str, msg)
99 log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
100 #super(LoggingUI, self).warn(*msg, **opts)
58 101
59 102 def error(self, *msg, **opts):
60 log.error(' '.join(msg).rstrip('\n'))
61 super(LoggingUI, self).error(*msg, **opts)
103 str_msg = map(safe_str, msg)
104 log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
105 #super(LoggingUI, self).error(*msg, **opts)
62 106
63 107 def note(self, *msg, **opts):
64 log.info(' '.join(msg).rstrip('\n'))
65 super(LoggingUI, self).note(*msg, **opts)
108 str_msg = map(safe_str, msg)
109 log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
110 #super(LoggingUI, self).note(*msg, **opts)
66 111
67 112 def debug(self, *msg, **opts):
68 log.debug(' '.join(msg).rstrip('\n'))
69 super(LoggingUI, self).debug(*msg, **opts)
113 str_msg = map(safe_str, msg)
114 log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
115 #super(LoggingUI, self).debug(*msg, **opts)
70 116
71 117 baseui = LoggingUI()
72 118
@@ -76,26 +122,26 b' def make_ui_from_config(repo_config):'
76 122 baseui._tcfg = hgconfig.config()
77 123
78 124 for section, option, value in repo_config:
79 baseui.setconfig(section, option, value)
125 baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
80 126
81 127 # make our hgweb quiet so it doesn't print output
82 baseui.setconfig('ui', 'quiet', 'true')
128 baseui.setconfig(b'ui', b'quiet', b'true')
83 129
84 baseui.setconfig('ui', 'paginate', 'never')
130 baseui.setconfig(b'ui', b'paginate', b'never')
85 131 # for better Error reporting of Mercurial
86 baseui.setconfig('ui', 'message-output', 'stderr')
132 baseui.setconfig(b'ui', b'message-output', b'stderr')
87 133
88 134 # force mercurial to only use 1 thread, otherwise it may try to set a
89 135 # signal in a non-main thread, thus generating a ValueError.
90 baseui.setconfig('worker', 'numcpus', 1)
136 baseui.setconfig(b'worker', b'numcpus', 1)
91 137
92 138 # If there is no config for the largefiles extension, we explicitly disable
93 139 # it here. This overrides settings from repositories hgrc file. Recent
94 140 # mercurial versions enable largefiles in hgrc on clone from largefile
95 141 # repo.
96 if not baseui.hasconfig('extensions', 'largefiles'):
142 if not baseui.hasconfig(b'extensions', b'largefiles'):
97 143 log.debug('Explicitly disable largefiles extension for repo.')
98 baseui.setconfig('extensions', 'largefiles', '!')
144 baseui.setconfig(b'extensions', b'largefiles', b'!')
99 145
100 146 return baseui
101 147
@@ -107,19 +153,19 b' def reraise_safe_exceptions(func):'
107 153 try:
108 154 return func(*args, **kwargs)
109 155 except (Abort, InterventionRequired) as e:
110 raise_from_original(exceptions.AbortException(e))
156 raise_from_original(exceptions.AbortException(e), e)
111 157 except RepoLookupError as e:
112 raise_from_original(exceptions.LookupException(e))
158 raise_from_original(exceptions.LookupException(e), e)
113 159 except RequirementError as e:
114 raise_from_original(exceptions.RequirementException(e))
160 raise_from_original(exceptions.RequirementException(e), e)
115 161 except RepoError as e:
116 raise_from_original(exceptions.VcsException(e))
162 raise_from_original(exceptions.VcsException(e), e)
117 163 except LookupError as e:
118 raise_from_original(exceptions.LookupException(e))
164 raise_from_original(exceptions.LookupException(e), e)
119 165 except Exception as e:
120 166 if not hasattr(e, '_vcs_kind'):
121 167 log.exception("Unhandled exception in hg remote call")
122 raise_from_original(exceptions.UnhandledException(e))
168 raise_from_original(exceptions.UnhandledException(e), e)
123 169
124 170 raise
125 171 return wrapper
@@ -130,9 +176,18 b' class MercurialFactory(RepoFactory):'
130 176
131 177 def _create_config(self, config, hooks=True):
132 178 if not hooks:
133 hooks_to_clean = frozenset((
134 'changegroup.repo_size', 'preoutgoing.pre_pull',
135 'outgoing.pull_logger', 'prechangegroup.pre_push'))
179
180 hooks_to_clean = {
181
182 hooks_config.HOOK_REPO_SIZE,
183 hooks_config.HOOK_PRE_PULL,
184 hooks_config.HOOK_PULL,
185
186 hooks_config.HOOK_PRE_PUSH,
187 # TODO: what about PRETXT, this was disabled in pre 5.0.0
188 hooks_config.HOOK_PRETX_PUSH,
189
190 }
136 191 new_config = []
137 192 for section, option, value in config:
138 193 if section == 'hooks' and option in hooks_to_clean:
@@ -145,7 +200,9 b' class MercurialFactory(RepoFactory):'
145 200
146 201 def _create_repo(self, wire, create):
147 202 baseui = self._create_config(wire["config"])
148 return instance(baseui, wire["path"], create)
203 repo = instance(baseui, safe_bytes(wire["path"]), create)
204 log.debug('repository created: got HG object: %s', repo)
205 return repo
149 206
150 207 def repo(self, wire, create=False):
151 208 """
@@ -155,7 +212,7 b' class MercurialFactory(RepoFactory):'
155 212
156 213
157 214 def patch_ui_message_output(baseui):
158 baseui.setconfig('ui', 'quiet', 'false')
215 baseui.setconfig(b'ui', b'quiet', b'false')
159 216 output = io.BytesIO()
160 217
161 218 def write(data, **unused_kwargs):
@@ -169,6 +226,22 b' def patch_ui_message_output(baseui):'
169 226 return baseui, output
170 227
171 228
229 def get_obfuscated_url(url_obj):
230 url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd
231 url_obj.query = obfuscate_qs(url_obj.query)
232 obfuscated_uri = str(url_obj)
233 return obfuscated_uri
234
235
236 def normalize_url_for_hg(url: str):
237 _proto = None
238
239 if '+' in url[:url.find('://')]:
240 _proto = url[0:url.find('+')]
241 url = url[url.find('+') + 1:]
242 return url, _proto
243
244
172 245 class HgRemote(RemoteBase):
173 246
174 247 def __init__(self, factory):
@@ -187,6 +260,13 b' class HgRemote(RemoteBase):'
187 260 "hidden": self.ctx_hidden,
188 261 "_file_paths": self.ctx_list,
189 262 }
263 self._bulk_file_methods = {
264 "size": self.fctx_size,
265 "data": self.fctx_node_data,
266 "flags": self.fctx_flags,
267 "is_binary": self.is_binary,
268 "md5": self.md5_hash,
269 }
190 270
191 271 def _get_ctx(self, repo, ref):
192 272 return get_ctx(repo, ref)
@@ -194,7 +274,7 b' class HgRemote(RemoteBase):'
194 274 @reraise_safe_exceptions
195 275 def discover_hg_version(self):
196 276 from mercurial import util
197 return util.version()
277 return safe_str(util.version())
198 278
199 279 @reraise_safe_exceptions
200 280 def is_empty(self, wire):
@@ -210,10 +290,11 b' class HgRemote(RemoteBase):'
210 290 def bookmarks(self, wire):
211 291 cache_on, context_uid, repo_id = self._cache_on(wire)
212 292 region = self._region(wire)
293
213 294 @region.conditional_cache_on_arguments(condition=cache_on)
214 295 def _bookmarks(_context_uid, _repo_id):
215 296 repo = self._factory.repo(wire)
216 return dict(repo._bookmarks)
297 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
217 298
218 299 return _bookmarks(context_uid, repo_id)
219 300
@@ -221,16 +302,17 b' class HgRemote(RemoteBase):'
221 302 def branches(self, wire, normal, closed):
222 303 cache_on, context_uid, repo_id = self._cache_on(wire)
223 304 region = self._region(wire)
305
224 306 @region.conditional_cache_on_arguments(condition=cache_on)
225 307 def _branches(_context_uid, _repo_id, _normal, _closed):
226 308 repo = self._factory.repo(wire)
227 309 iter_branches = repo.branchmap().iterbranches()
228 310 bt = {}
229 for branch_name, _heads, tip, is_closed in iter_branches:
311 for branch_name, _heads, tip_node, is_closed in iter_branches:
230 312 if normal and not is_closed:
231 bt[branch_name] = tip
313 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
232 314 if closed and is_closed:
233 bt[branch_name] = tip
315 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
234 316
235 317 return bt
236 318
@@ -240,16 +322,18 b' class HgRemote(RemoteBase):'
240 322 def bulk_request(self, wire, commit_id, pre_load):
241 323 cache_on, context_uid, repo_id = self._cache_on(wire)
242 324 region = self._region(wire)
325
243 326 @region.conditional_cache_on_arguments(condition=cache_on)
244 327 def _bulk_request(_repo_id, _commit_id, _pre_load):
245 328 result = {}
246 329 for attr in pre_load:
247 330 try:
248 331 method = self._bulk_methods[attr]
332 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
249 333 result[attr] = method(wire, commit_id)
250 334 except KeyError as e:
251 335 raise exceptions.VcsException(e)(
252 'Unknown bulk attribute: "%s"' % attr)
336 f'Unknown bulk attribute: "{attr}"')
253 337 return result
254 338
255 339 return _bulk_request(repo_id, commit_id, sorted(pre_load))
@@ -258,6 +342,7 b' class HgRemote(RemoteBase):'
258 342 def ctx_branch(self, wire, commit_id):
259 343 cache_on, context_uid, repo_id = self._cache_on(wire)
260 344 region = self._region(wire)
345
261 346 @region.conditional_cache_on_arguments(condition=cache_on)
262 347 def _ctx_branch(_repo_id, _commit_id):
263 348 repo = self._factory.repo(wire)
@@ -269,6 +354,7 b' class HgRemote(RemoteBase):'
269 354 def ctx_date(self, wire, commit_id):
270 355 cache_on, context_uid, repo_id = self._cache_on(wire)
271 356 region = self._region(wire)
357
272 358 @region.conditional_cache_on_arguments(condition=cache_on)
273 359 def _ctx_date(_repo_id, _commit_id):
274 360 repo = self._factory.repo(wire)
@@ -286,6 +372,7 b' class HgRemote(RemoteBase):'
286 372 def ctx_files(self, wire, commit_id):
287 373 cache_on, context_uid, repo_id = self._cache_on(wire)
288 374 region = self._region(wire)
375
289 376 @region.conditional_cache_on_arguments(condition=cache_on)
290 377 def _ctx_files(_repo_id, _commit_id):
291 378 repo = self._factory.repo(wire)
@@ -304,6 +391,7 b' class HgRemote(RemoteBase):'
304 391 def ctx_parents(self, wire, commit_id):
305 392 cache_on, context_uid, repo_id = self._cache_on(wire)
306 393 region = self._region(wire)
394
307 395 @region.conditional_cache_on_arguments(condition=cache_on)
308 396 def _ctx_parents(_repo_id, _commit_id):
309 397 repo = self._factory.repo(wire)
@@ -317,6 +405,7 b' class HgRemote(RemoteBase):'
317 405 def ctx_children(self, wire, commit_id):
318 406 cache_on, context_uid, repo_id = self._cache_on(wire)
319 407 region = self._region(wire)
408
320 409 @region.conditional_cache_on_arguments(condition=cache_on)
321 410 def _ctx_children(_repo_id, _commit_id):
322 411 repo = self._factory.repo(wire)
@@ -330,6 +419,7 b' class HgRemote(RemoteBase):'
330 419 def ctx_phase(self, wire, commit_id):
331 420 cache_on, context_uid, repo_id = self._cache_on(wire)
332 421 region = self._region(wire)
422
333 423 @region.conditional_cache_on_arguments(condition=cache_on)
334 424 def _ctx_phase(_context_uid, _repo_id, _commit_id):
335 425 repo = self._factory.repo(wire)
@@ -342,6 +432,7 b' class HgRemote(RemoteBase):'
342 432 def ctx_obsolete(self, wire, commit_id):
343 433 cache_on, context_uid, repo_id = self._cache_on(wire)
344 434 region = self._region(wire)
435
345 436 @region.conditional_cache_on_arguments(condition=cache_on)
346 437 def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
347 438 repo = self._factory.repo(wire)
@@ -353,6 +444,7 b' class HgRemote(RemoteBase):'
353 444 def ctx_hidden(self, wire, commit_id):
354 445 cache_on, context_uid, repo_id = self._cache_on(wire)
355 446 region = self._region(wire)
447
356 448 @region.conditional_cache_on_arguments(condition=cache_on)
357 449 def _ctx_hidden(_context_uid, _repo_id, _commit_id):
358 450 repo = self._factory.repo(wire)
@@ -384,46 +476,42 b' class HgRemote(RemoteBase):'
384 476
385 477 @reraise_safe_exceptions
386 478 def check_url(self, url, config):
387 _proto = None
388 if '+' in url[:url.find('://')]:
389 _proto = url[0:url.find('+')]
390 url = url[url.find('+') + 1:]
479 url, _proto = normalize_url_for_hg(url)
480 url_obj = url_parser(safe_bytes(url))
481
482 test_uri = safe_str(url_obj.authinfo()[0])
483 authinfo = url_obj.authinfo()[1]
484 obfuscated_uri = get_obfuscated_url(url_obj)
485 log.info("Checking URL for remote cloning/import: %s", obfuscated_uri)
486
391 487 handlers = []
392 url_obj = url_parser(url)
393 test_uri, authinfo = url_obj.authinfo()
394 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
395 url_obj.query = obfuscate_qs(url_obj.query)
396
397 cleaned_uri = str(url_obj)
398 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
399
400 488 if authinfo:
401 489 # create a password manager
402 passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
403 passmgr.add_password(*authinfo)
490 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
491 passmgr.add_password(*convert_to_str(authinfo))
404 492
405 493 handlers.extend((httpbasicauthhandler(passmgr),
406 494 httpdigestauthhandler(passmgr)))
407 495
408 o = urllib2.build_opener(*handlers)
496 o = urllib.request.build_opener(*handlers)
409 497 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
410 498 ('Accept', 'application/mercurial-0.1')]
411 499
412 500 q = {"cmd": 'between'}
413 q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
414 qs = '?%s' % urllib.urlencode(q)
415 cu = "%s%s" % (test_uri, qs)
416 req = urllib2.Request(cu, None, {})
501 q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
502 qs = f'?{urllib.parse.urlencode(q)}'
503 cu = f"{test_uri}{qs}"
417 504
418 505 try:
419 log.debug("Trying to open URL %s", cleaned_uri)
506 req = urllib.request.Request(cu, None, {})
507 log.debug("Trying to open URL %s", obfuscated_uri)
420 508 resp = o.open(req)
421 509 if resp.code != 200:
422 510 raise exceptions.URLError()('Return Code is not 200')
423 511 except Exception as e:
424 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
512 log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True)
425 513 # means it cannot be cloned
426 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
514 raise exceptions.URLError(e)(f"[{obfuscated_uri}] org_exc: {e}")
427 515
428 516 # now check if it's a proper hg repo, but don't do it for svn
429 517 try:
@@ -432,19 +520,17 b' class HgRemote(RemoteBase):'
432 520 else:
433 521 # check for pure hg repos
434 522 log.debug(
435 "Verifying if URL is a Mercurial repository: %s",
436 cleaned_uri)
523 "Verifying if URL is a Mercurial repository: %s", obfuscated_uri)
437 524 ui = make_ui_from_config(config)
438 peer_checker = makepeer(ui, url)
439 peer_checker.lookup('tip')
525 peer_checker = makepeer(ui, safe_bytes(url))
526 peer_checker.lookup(b'tip')
440 527 except Exception as e:
441 528 log.warning("URL is not a valid Mercurial repository: %s",
442 cleaned_uri)
529 obfuscated_uri)
443 530 raise exceptions.URLError(e)(
444 "url [%s] does not look like an hg repo org_exc: %s"
445 % (cleaned_uri, e))
531 f"url [{obfuscated_uri}] does not look like an hg repo org_exc: {e}")
446 532
447 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
533 log.info("URL is a valid Mercurial repository: %s", obfuscated_uri)
448 534 return True
449 535
450 536 @reraise_safe_exceptions
@@ -452,14 +538,17 b' class HgRemote(RemoteBase):'
452 538 repo = self._factory.repo(wire)
453 539
454 540 if file_filter:
455 match_filter = match(file_filter[0], '', [file_filter[1]])
541 # unpack the file-filter
542 repo_path, node_path = file_filter
543 match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
456 544 else:
457 545 match_filter = file_filter
458 546 opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
459 547
460 548 try:
461 return "".join(patch.diff(
462 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts))
549 diff_iter = patch.diff(
550 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
551 return BytesEnvelope(b"".join(diff_iter))
463 552 except RepoLookupError as e:
464 553 raise exceptions.LookupException(e)()
465 554
@@ -467,23 +556,27 b' class HgRemote(RemoteBase):'
467 556 def node_history(self, wire, revision, path, limit):
468 557 cache_on, context_uid, repo_id = self._cache_on(wire)
469 558 region = self._region(wire)
559
470 560 @region.conditional_cache_on_arguments(condition=cache_on)
471 561 def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
472 562 repo = self._factory.repo(wire)
473 563
474 564 ctx = self._get_ctx(repo, revision)
475 fctx = ctx.filectx(path)
565 fctx = ctx.filectx(safe_bytes(path))
476 566
477 567 def history_iter():
478 568 limit_rev = fctx.rev()
479 for obj in reversed(list(fctx.filelog())):
480 obj = fctx.filectx(obj)
481 ctx = obj.changectx()
482 if ctx.hidden() or ctx.obsolete():
569
570 for fctx_candidate in reversed(list(fctx.filelog())):
571 f_obj = fctx.filectx(fctx_candidate)
572
573 # NOTE: This can be problematic...we can hide ONLY history node resulting in empty history
574 _ctx = f_obj.changectx()
575 if _ctx.hidden() or _ctx.obsolete():
483 576 continue
484 577
485 if limit_rev >= obj.rev():
486 yield obj
578 if limit_rev >= f_obj.rev():
579 yield f_obj
487 580
488 581 history = []
489 582 for cnt, obj in enumerate(history_iter()):
@@ -495,14 +588,15 b' class HgRemote(RemoteBase):'
495 588 return _node_history(context_uid, repo_id, revision, path, limit)
496 589
497 590 @reraise_safe_exceptions
498 def node_history_untill(self, wire, revision, path, limit):
591 def node_history_until(self, wire, revision, path, limit):
499 592 cache_on, context_uid, repo_id = self._cache_on(wire)
500 593 region = self._region(wire)
594
501 595 @region.conditional_cache_on_arguments(condition=cache_on)
502 596 def _node_history_until(_context_uid, _repo_id):
503 597 repo = self._factory.repo(wire)
504 598 ctx = self._get_ctx(repo, revision)
505 fctx = ctx.filectx(path)
599 fctx = ctx.filectx(safe_bytes(path))
506 600
507 601 file_log = list(fctx.filelog())
508 602 if limit:
@@ -513,35 +607,55 b' class HgRemote(RemoteBase):'
513 607 return _node_history_until(context_uid, repo_id, revision, path, limit)
514 608
515 609 @reraise_safe_exceptions
610 def bulk_file_request(self, wire, commit_id, path, pre_load):
611 cache_on, context_uid, repo_id = self._cache_on(wire)
612 region = self._region(wire)
613
614 @region.conditional_cache_on_arguments(condition=cache_on)
615 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
616 result = {}
617 for attr in pre_load:
618 try:
619 method = self._bulk_file_methods[attr]
620 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
621 result[attr] = method(wire, _commit_id, _path)
622 except KeyError as e:
623 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
624 return result
625
626 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
627
628 @reraise_safe_exceptions
516 629 def fctx_annotate(self, wire, revision, path):
517 630 repo = self._factory.repo(wire)
518 631 ctx = self._get_ctx(repo, revision)
519 fctx = ctx.filectx(path)
632 fctx = ctx.filectx(safe_bytes(path))
520 633
521 634 result = []
522 635 for i, annotate_obj in enumerate(fctx.annotate(), 1):
523 636 ln_no = i
524 637 sha = hex(annotate_obj.fctx.node())
525 638 content = annotate_obj.text
526 result.append((ln_no, sha, content))
527 return result
639 result.append((ln_no, ascii_str(sha), content))
640 return BinaryEnvelope(result)
528 641
529 642 @reraise_safe_exceptions
530 643 def fctx_node_data(self, wire, revision, path):
531 644 repo = self._factory.repo(wire)
532 645 ctx = self._get_ctx(repo, revision)
533 fctx = ctx.filectx(path)
534 return fctx.data()
646 fctx = ctx.filectx(safe_bytes(path))
647 return BytesEnvelope(fctx.data())
535 648
536 649 @reraise_safe_exceptions
537 650 def fctx_flags(self, wire, commit_id, path):
538 651 cache_on, context_uid, repo_id = self._cache_on(wire)
539 652 region = self._region(wire)
653
540 654 @region.conditional_cache_on_arguments(condition=cache_on)
541 655 def _fctx_flags(_repo_id, _commit_id, _path):
542 656 repo = self._factory.repo(wire)
543 657 ctx = self._get_ctx(repo, commit_id)
544 fctx = ctx.filectx(path)
658 fctx = ctx.filectx(safe_bytes(path))
545 659 return fctx.flags()
546 660
547 661 return _fctx_flags(repo_id, commit_id, path)
@@ -550,11 +664,12 b' class HgRemote(RemoteBase):'
550 664 def fctx_size(self, wire, commit_id, path):
551 665 cache_on, context_uid, repo_id = self._cache_on(wire)
552 666 region = self._region(wire)
667
553 668 @region.conditional_cache_on_arguments(condition=cache_on)
554 669 def _fctx_size(_repo_id, _revision, _path):
555 670 repo = self._factory.repo(wire)
556 671 ctx = self._get_ctx(repo, commit_id)
557 fctx = ctx.filectx(path)
672 fctx = ctx.filectx(safe_bytes(path))
558 673 return fctx.size()
559 674 return _fctx_size(repo_id, commit_id, path)
560 675
@@ -562,44 +677,59 b' class HgRemote(RemoteBase):'
562 677 def get_all_commit_ids(self, wire, name):
563 678 cache_on, context_uid, repo_id = self._cache_on(wire)
564 679 region = self._region(wire)
680
565 681 @region.conditional_cache_on_arguments(condition=cache_on)
566 682 def _get_all_commit_ids(_context_uid, _repo_id, _name):
567 683 repo = self._factory.repo(wire)
568 repo = repo.filtered(name)
569 revs = map(lambda x: hex(x[7]), repo.changelog.index)
684 revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
570 685 return revs
571 686 return _get_all_commit_ids(context_uid, repo_id, name)
572 687
573 688 @reraise_safe_exceptions
574 689 def get_config_value(self, wire, section, name, untrusted=False):
575 690 repo = self._factory.repo(wire)
576 return repo.ui.config(section, name, untrusted=untrusted)
691 return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
577 692
578 693 @reraise_safe_exceptions
579 694 def is_large_file(self, wire, commit_id, path):
580 695 cache_on, context_uid, repo_id = self._cache_on(wire)
581 696 region = self._region(wire)
697
582 698 @region.conditional_cache_on_arguments(condition=cache_on)
583 699 def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
584 return largefiles.lfutil.isstandin(path)
700 return largefiles.lfutil.isstandin(safe_bytes(path))
585 701
586 702 return _is_large_file(context_uid, repo_id, commit_id, path)
587 703
588 704 @reraise_safe_exceptions
589 705 def is_binary(self, wire, revision, path):
590 706 cache_on, context_uid, repo_id = self._cache_on(wire)
707 region = self._region(wire)
591 708
592 region = self._region(wire)
593 709 @region.conditional_cache_on_arguments(condition=cache_on)
594 710 def _is_binary(_repo_id, _sha, _path):
595 711 repo = self._factory.repo(wire)
596 712 ctx = self._get_ctx(repo, revision)
597 fctx = ctx.filectx(path)
713 fctx = ctx.filectx(safe_bytes(path))
598 714 return fctx.isbinary()
599 715
600 716 return _is_binary(repo_id, revision, path)
601 717
602 718 @reraise_safe_exceptions
719 def md5_hash(self, wire, revision, path):
720 cache_on, context_uid, repo_id = self._cache_on(wire)
721 region = self._region(wire)
722
723 @region.conditional_cache_on_arguments(condition=cache_on)
724 def _md5_hash(_repo_id, _sha, _path):
725 repo = self._factory.repo(wire)
726 ctx = self._get_ctx(repo, revision)
727 fctx = ctx.filectx(safe_bytes(path))
728 return hashlib.md5(fctx.data()).hexdigest()
729
730 return _md5_hash(repo_id, revision, path)
731
732 @reraise_safe_exceptions
603 733 def in_largefiles_store(self, wire, sha):
604 734 repo = self._factory.repo(wire)
605 735 return largefiles.lfutil.instore(repo, sha)
@@ -627,11 +757,10 b' class HgRemote(RemoteBase):'
627 757 @reraise_safe_exceptions
628 758 def lookup(self, wire, revision, both):
629 759 cache_on, context_uid, repo_id = self._cache_on(wire)
760 region = self._region(wire)
630 761
631 region = self._region(wire)
632 762 @region.conditional_cache_on_arguments(condition=cache_on)
633 763 def _lookup(_context_uid, _repo_id, _revision, _both):
634
635 764 repo = self._factory.repo(wire)
636 765 rev = _revision
637 766 if isinstance(rev, int):
@@ -644,11 +773,15 b' class HgRemote(RemoteBase):'
644 773 rev = rev + -1
645 774 try:
646 775 ctx = self._get_ctx(repo, rev)
647 except (TypeError, RepoLookupError) as e:
648 e._org_exc_tb = traceback.format_exc()
776 except AmbiguousPrefixLookupError:
777 e = RepoLookupError(rev)
778 e._org_exc_tb = format_exc(sys.exc_info())
779 raise exceptions.LookupException(e)(rev)
780 except (TypeError, RepoLookupError, binascii.Error) as e:
781 e._org_exc_tb = format_exc(sys.exc_info())
649 782 raise exceptions.LookupException(e)(rev)
650 783 except LookupError as e:
651 e._org_exc_tb = traceback.format_exc()
784 e._org_exc_tb = format_exc(sys.exc_info())
652 785 raise exceptions.LookupException(e)(e.name)
653 786
654 787 if not both:
@@ -667,12 +800,12 b' class HgRemote(RemoteBase):'
667 800 repo = self._factory.repo(wire)
668 801
669 802 # Disable any prompts for this repo
670 repo.ui.setconfig('ui', 'interactive', 'off', '-y')
803 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
671 804
672 bookmarks = dict(repo._bookmarks).keys()
673 remote = peer(repo, {}, url)
805 bookmarks = list(dict(repo._bookmarks).keys())
806 remote = peer(repo, {}, safe_bytes(url))
674 807 # Disable any prompts for this remote
675 remote.ui.setconfig('ui', 'interactive', 'off', '-y')
808 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
676 809
677 810 return exchange.push(
678 811 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
@@ -686,12 +819,15 b' class HgRemote(RemoteBase):'
686 819 @reraise_safe_exceptions
687 820 def rev_range(self, wire, commit_filter):
688 821 cache_on, context_uid, repo_id = self._cache_on(wire)
822 region = self._region(wire)
689 823
690 region = self._region(wire)
691 824 @region.conditional_cache_on_arguments(condition=cache_on)
692 825 def _rev_range(_context_uid, _repo_id, _filter):
693 826 repo = self._factory.repo(wire)
694 revisions = [rev for rev in revrange(repo, commit_filter)]
827 revisions = [
828 ascii_str(repo[rev].hex())
829 for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
830 ]
695 831 return revisions
696 832
697 833 return _rev_range(context_uid, repo_id, sorted(commit_filter))
@@ -710,17 +846,18 b' class HgRemote(RemoteBase):'
710 846 return len(repo) - 1, 0
711 847
712 848 stop, start = get_revs(repo, [node + ':'])
713 revs = [hex(repo[r].node()) for r in xrange(start, stop + 1)]
849 revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
714 850 return revs
715 851
716 852 @reraise_safe_exceptions
717 853 def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
718 other_path = kwargs.pop('other_path', None)
854 org_path = safe_bytes(wire["path"])
855 other_path = safe_bytes(kwargs.pop('other_path', ''))
719 856
720 857 # case when we want to compare two independent repositories
721 858 if other_path and other_path != wire["path"]:
722 859 baseui = self._factory._create_config(wire["config"])
723 repo = unionrepo.makeunionrepository(baseui, other_path, wire["path"])
860 repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
724 861 else:
725 862 repo = self._factory.repo(wire)
726 863 return list(repo.revs(rev_spec, *args))
@@ -764,17 +901,20 b' class HgRemote(RemoteBase):'
764 901 def tags(self, wire):
765 902 cache_on, context_uid, repo_id = self._cache_on(wire)
766 903 region = self._region(wire)
904
767 905 @region.conditional_cache_on_arguments(condition=cache_on)
768 906 def _tags(_context_uid, _repo_id):
769 907 repo = self._factory.repo(wire)
770 return repo.tags()
908 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
771 909
772 910 return _tags(context_uid, repo_id)
773 911
774 912 @reraise_safe_exceptions
775 def update(self, wire, node=None, clean=False):
913 def update(self, wire, node='', clean=False):
776 914 repo = self._factory.repo(wire)
777 915 baseui = self._factory._create_config(wire['config'])
916 node = safe_bytes(node)
917
778 918 commands.update(baseui, repo, node=node, clean=clean)
779 919
780 920 @reraise_safe_exceptions
@@ -800,10 +940,10 b' class HgRemote(RemoteBase):'
800 940
801 941 baseui.write = write
802 942 if branch:
803 args = [branch]
943 args = [safe_bytes(branch)]
804 944 else:
805 945 args = []
806 commands.heads(baseui, repo, template='{node} ', *args)
946 commands.heads(baseui, repo, template=b'{node} ', *args)
807 947
808 948 return output.getvalue()
809 949
@@ -812,63 +952,61 b' class HgRemote(RemoteBase):'
812 952 repo = self._factory.repo(wire)
813 953 changelog = repo.changelog
814 954 lookup = repo.lookup
815 a = changelog.ancestor(lookup(revision1), lookup(revision2))
955 a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
816 956 return hex(a)
817 957
818 958 @reraise_safe_exceptions
819 959 def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
820 960 baseui = self._factory._create_config(wire["config"], hooks=hooks)
821 clone(baseui, source, dest, noupdate=not update_after_clone)
961 clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
822 962
823 963 @reraise_safe_exceptions
824 964 def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
825 965
826 966 repo = self._factory.repo(wire)
827 967 baseui = self._factory._create_config(wire['config'])
828 publishing = baseui.configbool('phases', 'publish')
829 if publishing:
830 new_commit = 'public'
831 else:
832 new_commit = 'draft'
968 publishing = baseui.configbool(b'phases', b'publish')
833 969
834 def _filectxfn(_repo, ctx, path):
970 def _filectxfn(_repo, ctx, path: bytes):
835 971 """
836 972 Marks given path as added/changed/removed in a given _repo. This is
837 973 for internal mercurial commit function.
838 974 """
839 975
840 976 # check if this path is removed
841 if path in removed:
977 if safe_str(path) in removed:
842 978 # returning None is a way to mark node for removal
843 979 return None
844 980
845 981 # check if this path is added
846 982 for node in updated:
847 if node['path'] == path:
983 if safe_bytes(node['path']) == path:
848 984 return memfilectx(
849 985 _repo,
850 986 changectx=ctx,
851 path=node['path'],
852 data=node['content'],
987 path=safe_bytes(node['path']),
988 data=safe_bytes(node['content']),
853 989 islink=False,
854 990 isexec=bool(node['mode'] & stat.S_IXUSR),
855 991 copysource=False)
992 abort_exc = exceptions.AbortException()
993 raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
856 994
857 raise exceptions.AbortException()(
858 "Given path haven't been marked as added, "
859 "changed or removed (%s)" % path)
860
861 with repo.ui.configoverride({('phases', 'new-commit'): new_commit}):
862
995 if publishing:
996 new_commit_phase = b'public'
997 else:
998 new_commit_phase = b'draft'
999 with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
1000 kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
863 1001 commit_ctx = memctx(
864 1002 repo=repo,
865 1003 parents=parents,
866 text=message,
867 files=files,
1004 text=safe_bytes(message),
1005 files=[safe_bytes(x) for x in files],
868 1006 filectxfn=_filectxfn,
869 user=user,
1007 user=safe_bytes(user),
870 1008 date=(commit_time, commit_timezone),
871 extra=extra)
1009 extra=kwargs)
872 1010
873 1011 n = repo.commitctx(commit_ctx)
874 1012 new_id = hex(n)
@@ -879,11 +1017,11 b' class HgRemote(RemoteBase):'
879 1017 def pull(self, wire, url, commit_ids=None):
880 1018 repo = self._factory.repo(wire)
881 1019 # Disable any prompts for this repo
882 repo.ui.setconfig('ui', 'interactive', 'off', '-y')
1020 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
883 1021
884 remote = peer(repo, {}, url)
1022 remote = peer(repo, {}, safe_bytes(url))
885 1023 # Disable any prompts for this remote
886 remote.ui.setconfig('ui', 'interactive', 'off', '-y')
1024 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
887 1025
888 1026 if commit_ids:
889 1027 commit_ids = [bin(commit_id) for commit_id in commit_ids]
@@ -892,34 +1030,47 b' class HgRemote(RemoteBase):'
892 1030 repo, remote, heads=commit_ids, force=None).cgresult
893 1031
894 1032 @reraise_safe_exceptions
895 def pull_cmd(self, wire, source, bookmark=None, branch=None, revision=None, hooks=True):
1033 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
896 1034 repo = self._factory.repo(wire)
897 1035 baseui = self._factory._create_config(wire['config'], hooks=hooks)
898 1036
1037 source = safe_bytes(source)
1038
899 1039 # Mercurial internally has a lot of logic that checks ONLY if
900 1040 # option is defined, we just pass those if they are defined then
901 1041 opts = {}
1042
902 1043 if bookmark:
903 opts['bookmark'] = bookmark
1044 opts['bookmark'] = [safe_bytes(x) for x in bookmark] \
1045 if isinstance(bookmark, list) else safe_bytes(bookmark)
1046
904 1047 if branch:
905 opts['branch'] = branch
1048 opts['branch'] = [safe_bytes(x) for x in branch] \
1049 if isinstance(branch, list) else safe_bytes(branch)
1050
906 1051 if revision:
907 opts['rev'] = revision
1052 opts['rev'] = [safe_bytes(x) for x in revision] \
1053 if isinstance(revision, list) else safe_bytes(revision)
908 1054
909 1055 commands.pull(baseui, repo, source, **opts)
910 1056
911 1057 @reraise_safe_exceptions
912 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
1058 def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False):
913 1059 repo = self._factory.repo(wire)
914 1060 baseui = self._factory._create_config(wire['config'], hooks=hooks)
915 commands.push(baseui, repo, dest=dest_path, rev=revisions,
1061
1062 revisions = [safe_bytes(x) for x in revisions] \
1063 if isinstance(revisions, list) else safe_bytes(revisions)
1064
1065 commands.push(baseui, repo, safe_bytes(dest_path),
1066 rev=revisions,
916 1067 new_branch=push_branches)
917 1068
918 1069 @reraise_safe_exceptions
919 1070 def strip(self, wire, revision, update, backup):
920 1071 repo = self._factory.repo(wire)
921 1072 ctx = self._get_ctx(repo, revision)
922 hgext_strip(
1073 hgext_strip.strip(
923 1074 repo.baseui, repo, ctx.node(), update=update, backup=backup)
924 1075
925 1076 @reraise_safe_exceptions
@@ -943,25 +1094,25 b' class HgRemote(RemoteBase):'
943 1094 def merge(self, wire, revision):
944 1095 repo = self._factory.repo(wire)
945 1096 baseui = self._factory._create_config(wire['config'])
946 repo.ui.setconfig('ui', 'merge', 'internal:dump')
1097 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
947 1098
948 1099 # In case of sub repositories are used mercurial prompts the user in
949 1100 # case of merge conflicts or different sub repository sources. By
950 1101 # setting the interactive flag to `False` mercurial doesn't prompt the
951 1102 # used but instead uses a default value.
952 repo.ui.setconfig('ui', 'interactive', False)
953 commands.merge(baseui, repo, rev=revision)
1103 repo.ui.setconfig(b'ui', b'interactive', False)
1104 commands.merge(baseui, repo, rev=safe_bytes(revision))
954 1105
955 1106 @reraise_safe_exceptions
956 1107 def merge_state(self, wire):
957 1108 repo = self._factory.repo(wire)
958 repo.ui.setconfig('ui', 'merge', 'internal:dump')
1109 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
959 1110
960 1111 # In case of sub repositories are used mercurial prompts the user in
961 1112 # case of merge conflicts or different sub repository sources. By
962 1113 # setting the interactive flag to `False` mercurial doesn't prompt the
963 1114 # used but instead uses a default value.
964 repo.ui.setconfig('ui', 'interactive', False)
1115 repo.ui.setconfig(b'ui', b'interactive', False)
965 1116 ms = hg_merge.mergestate(repo)
966 1117 return [x for x in ms.unresolved()]
967 1118
@@ -969,20 +1120,34 b' class HgRemote(RemoteBase):'
969 1120 def commit(self, wire, message, username, close_branch=False):
970 1121 repo = self._factory.repo(wire)
971 1122 baseui = self._factory._create_config(wire['config'])
972 repo.ui.setconfig('ui', 'username', username)
973 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1123 repo.ui.setconfig(b'ui', b'username', safe_bytes(username))
1124 commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch)
974 1125
975 1126 @reraise_safe_exceptions
976 def rebase(self, wire, source=None, dest=None, abort=False):
1127 def rebase(self, wire, source='', dest='', abort=False):
1128
977 1129 repo = self._factory.repo(wire)
978 1130 baseui = self._factory._create_config(wire['config'])
979 repo.ui.setconfig('ui', 'merge', 'internal:dump')
1131 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
980 1132 # In case of sub repositories are used mercurial prompts the user in
981 1133 # case of merge conflicts or different sub repository sources. By
982 1134 # setting the interactive flag to `False` mercurial doesn't prompt the
983 1135 # used but instead uses a default value.
984 repo.ui.setconfig('ui', 'interactive', False)
985 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1136 repo.ui.setconfig(b'ui', b'interactive', False)
1137
1138 rebase_kws = dict(
1139 keep=not abort,
1140 abort=abort
1141 )
1142
1143 if source:
1144 source = repo[source]
1145 rebase_kws['base'] = [source.hex()]
1146 if dest:
1147 dest = repo[dest]
1148 rebase_kws['dest'] = dest.hex()
1149
1150 rebase.rebase(baseui, repo, **rebase_kws)
986 1151
987 1152 @reraise_safe_exceptions
988 1153 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
@@ -992,17 +1157,18 b' class HgRemote(RemoteBase):'
992 1157
993 1158 date = (tag_time, tag_timezone)
994 1159 try:
995 hg_tag.tag(repo, name, node, message, local, user, date)
1160 hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date)
996 1161 except Abort as e:
997 1162 log.exception("Tag operation aborted")
998 1163 # Exception can contain unicode which we convert
999 1164 raise exceptions.AbortException(e)(repr(e))
1000 1165
1001 1166 @reraise_safe_exceptions
1002 def bookmark(self, wire, bookmark, revision=None):
1167 def bookmark(self, wire, bookmark, revision=''):
1003 1168 repo = self._factory.repo(wire)
1004 1169 baseui = self._factory._create_config(wire['config'])
1005 commands.bookmark(baseui, repo, bookmark, rev=revision, force=True)
1170 revision = revision or ''
1171 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1006 1172
1007 1173 @reraise_safe_exceptions
1008 1174 def install_hooks(self, wire, force=False):
@@ -1012,8 +1178,8 b' class HgRemote(RemoteBase):'
1012 1178 @reraise_safe_exceptions
1013 1179 def get_hooks_info(self, wire):
1014 1180 return {
1015 'pre_version': vcsserver.__version__,
1016 'post_version': vcsserver.__version__,
1181 'pre_version': vcsserver.get_version(),
1182 'post_version': vcsserver.get_version(),
1017 1183 }
1018 1184
1019 1185 @reraise_safe_exceptions
@@ -1021,8 +1187,8 b' class HgRemote(RemoteBase):'
1021 1187 pass
1022 1188
1023 1189 @reraise_safe_exceptions
1024 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1025 archive_dir_name, commit_id):
1190 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
1191 archive_dir_name, commit_id, cache_config):
1026 1192
1027 1193 def file_walker(_commit_id, path):
1028 1194 repo = self._factory.repo(wire)
@@ -1031,7 +1197,7 b' class HgRemote(RemoteBase):'
1031 1197 if is_root:
1032 1198 matcher = alwaysmatcher(badfn=None)
1033 1199 else:
1034 matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
1200 matcher = patternmatcher('', [(b'glob', safe_bytes(path)+b'/**', b'')], badfn=None)
1035 1201 file_iter = ctx.manifest().walk(matcher)
1036 1202
1037 1203 for fn in file_iter:
@@ -1042,6 +1208,6 b' class HgRemote(RemoteBase):'
1042 1208
1043 1209 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1044 1210
1045 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1046 archive_dir_name, commit_id)
1211 return store_archive_in_cache(
1212 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
1047 1213
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -15,31 +15,42 b''
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 from __future__ import absolute_import
19 18
20 19 import os
21 20 import subprocess
22 import time
23 from urllib2 import URLError
24 import urlparse
21 from urllib.error import URLError
22 import urllib.parse
25 23 import logging
26 24 import posixpath as vcspath
27 import StringIO
28 import urllib
25 import io
26 import urllib.request
27 import urllib.parse
28 import urllib.error
29 29 import traceback
30 30
31 import svn.client
32 import svn.core
33 import svn.delta
34 import svn.diff
35 import svn.fs
36 import svn.repos
31
32 import svn.client # noqa
33 import svn.core # noqa
34 import svn.delta # noqa
35 import svn.diff # noqa
36 import svn.fs # noqa
37 import svn.repos # noqa
37 38
39 import rhodecode
38 40 from vcsserver import svn_diff, exceptions, subprocessio, settings
39 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo
41 from vcsserver.base import (
42 RepoFactory,
43 raise_from_original,
44 ArchiveNode,
45 store_archive_in_cache,
46 BytesEnvelope,
47 BinaryEnvelope,
48 )
40 49 from vcsserver.exceptions import NoContentException
41 from vcsserver.utils import safe_str
50 from vcsserver.str_utils import safe_str, safe_bytes
51 from vcsserver.type_utils import assert_bytes
42 52 from vcsserver.vcs_base import RemoteBase
53 from vcsserver.lib.svnremoterepo import svnremoterepo
43 54
44 55 log = logging.getLogger(__name__)
45 56
@@ -52,7 +63,7 b' svn_compatible_versions_map = {'
52 63 'pre-1.9-compatible': '1.8',
53 64 }
54 65
55 current_compatible_version = '1.12'
66 current_compatible_version = '1.14'
56 67
57 68
58 69 def reraise_safe_exceptions(func):
@@ -63,7 +74,7 b' def reraise_safe_exceptions(func):'
63 74 except Exception as e:
64 75 if not hasattr(e, '_vcs_kind'):
65 76 log.exception("Unhandled exception in svn remote call")
66 raise_from_original(exceptions.UnhandledException(e))
77 raise_from_original(exceptions.UnhandledException(e), e)
67 78 raise
68 79 return wrapper
69 80
@@ -82,12 +93,12 b' class SubversionFactory(RepoFactory):'
82 93 or compatible_version
83 94 fs_config['compatible-version'] = compatible_version_string
84 95
85 log.debug('Create SVN repo with config "%s"', fs_config)
96 log.debug('Create SVN repo with config `%s`', fs_config)
86 97 repo = svn.repos.create(path, "", "", None, fs_config)
87 98 else:
88 99 repo = svn.repos.open(path)
89 100
90 log.debug('Got SVN object: %s', repo)
101 log.debug('repository created: got SVN object: %s', repo)
91 102 return repo
92 103
93 104 def repo(self, wire, create=False, compatible_version=None):
@@ -107,9 +118,39 b' class SvnRemote(RemoteBase):'
107 118
108 119 def __init__(self, factory, hg_factory=None):
109 120 self._factory = factory
110 # TODO: Remove once we do not use internal Mercurial objects anymore
111 # for subversion
112 self._hg_factory = hg_factory
121
122 self._bulk_methods = {
123 # NOT supported in SVN ATM...
124 }
125 self._bulk_file_methods = {
126 "size": self.get_file_size,
127 "data": self.get_file_content,
128 "flags": self.get_node_type,
129 "is_binary": self.is_binary,
130 "md5": self.md5_hash
131 }
132
133 @reraise_safe_exceptions
134 def bulk_file_request(self, wire, commit_id, path, pre_load):
135 cache_on, context_uid, repo_id = self._cache_on(wire)
136 region = self._region(wire)
137
138 # since we use unified API, we need to cast from str to in for SVN
139 commit_id = int(commit_id)
140
141 @region.conditional_cache_on_arguments(condition=cache_on)
142 def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load):
143 result = {}
144 for attr in pre_load:
145 try:
146 method = self._bulk_file_methods[attr]
147 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
148 result[attr] = method(wire, _commit_id, _path)
149 except KeyError as e:
150 raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"')
151 return result
152
153 return BinaryEnvelope(_bulk_file_request(repo_id, commit_id, path, sorted(pre_load)))
113 154
114 155 @reraise_safe_exceptions
115 156 def discover_svn_version(self):
@@ -118,61 +159,64 b' class SvnRemote(RemoteBase):'
118 159 svn_ver = svn.core.SVN_VERSION
119 160 except ImportError:
120 161 svn_ver = None
121 return svn_ver
162 return safe_str(svn_ver)
122 163
123 164 @reraise_safe_exceptions
124 165 def is_empty(self, wire):
125
126 166 try:
127 167 return self.lookup(wire, -1) == 0
128 168 except Exception:
129 169 log.exception("failed to read object_store")
130 170 return False
131 171
132 def check_url(self, url, config_items):
133 # this can throw exception if not installed, but we detect this
134 from hgsubversion import svnrepo
172 def check_url(self, url, config):
135 173
136 baseui = self._hg_factory._create_config(config_items)
137 # uuid function get's only valid UUID from proper repo, else
174 # uuid function gets only valid UUID from proper repo, else
138 175 # throws exception
176 username, password, src_url = self.get_url_and_credentials(url)
139 177 try:
140 svnrepo.svnremoterepo(baseui, url).svn.uuid
178 svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid
141 179 except Exception:
142 180 tb = traceback.format_exc()
143 181 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
144 raise URLError(
145 '"%s" is not a valid Subversion source url.' % (url, ))
182 raise URLError(f'"{url}" is not a valid Subversion source url.')
146 183 return True
147 184
148 185 def is_path_valid_repository(self, wire, path):
149
150 186 # NOTE(marcink): short circuit the check for SVN repo
151 187 # the repos.open might be expensive to check, but we have one cheap
152 # pre condition that we can use, to check for 'format' file
153
188 # pre-condition that we can use, to check for 'format' file
154 189 if not os.path.isfile(os.path.join(path, 'format')):
155 190 return False
156 191
157 try:
158 svn.repos.open(path)
159 except svn.core.SubversionException:
160 tb = traceback.format_exc()
161 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
162 return False
163 return True
192 cache_on, context_uid, repo_id = self._cache_on(wire)
193 region = self._region(wire)
194
195 @region.conditional_cache_on_arguments(condition=cache_on)
196 def _assert_correct_path(_context_uid, _repo_id, fast_check):
197
198 try:
199 svn.repos.open(path)
200 except svn.core.SubversionException:
201 tb = traceback.format_exc()
202 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
203 return False
204 return True
205
206 return _assert_correct_path(context_uid, repo_id, True)
164 207
165 208 @reraise_safe_exceptions
166 209 def verify(self, wire,):
167 210 repo_path = wire['path']
168 211 if not self.is_path_valid_repository(wire, repo_path):
169 212 raise Exception(
170 "Path %s is not a valid Subversion repository." % repo_path)
213 f"Path {repo_path} is not a valid Subversion repository.")
171 214
172 215 cmd = ['svnadmin', 'info', repo_path]
173 216 stdout, stderr = subprocessio.run_command(cmd)
174 217 return stdout
175 218
219 @reraise_safe_exceptions
176 220 def lookup(self, wire, revision):
177 221 if revision not in [-1, None, 'HEAD']:
178 222 raise NotImplementedError
@@ -181,6 +225,7 b' class SvnRemote(RemoteBase):'
181 225 head = svn.fs.youngest_rev(fs_ptr)
182 226 return head
183 227
228 @reraise_safe_exceptions
184 229 def lookup_interval(self, wire, start_ts, end_ts):
185 230 repo = self._factory.repo(wire)
186 231 fsobj = svn.repos.fs(repo)
@@ -198,10 +243,12 b' class SvnRemote(RemoteBase):'
198 243 end_rev = svn.fs.youngest_rev(fsobj)
199 244 return start_rev, end_rev
200 245
246 @reraise_safe_exceptions
201 247 def revision_properties(self, wire, revision):
202 248
203 249 cache_on, context_uid, repo_id = self._cache_on(wire)
204 250 region = self._region(wire)
251
205 252 @region.conditional_cache_on_arguments(condition=cache_on)
206 253 def _revision_properties(_repo_id, _revision):
207 254 repo = self._factory.repo(wire)
@@ -228,7 +275,7 b' class SvnRemote(RemoteBase):'
228 275 removed = []
229 276
230 277 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
231 for path, change in editor.changes.iteritems():
278 for path, change in editor.changes.items():
232 279 # TODO: Decide what to do with directory nodes. Subversion can add
233 280 # empty directories.
234 281
@@ -243,7 +290,7 b' class SvnRemote(RemoteBase):'
243 290 removed.append(path)
244 291 else:
245 292 raise NotImplementedError(
246 "Action %s not supported on path %s" % (
293 "Action {} not supported on path {}".format(
247 294 change.action, path))
248 295
249 296 changes = {
@@ -257,6 +304,7 b' class SvnRemote(RemoteBase):'
257 304 def node_history(self, wire, path, revision, limit):
258 305 cache_on, context_uid, repo_id = self._cache_on(wire)
259 306 region = self._region(wire)
307
260 308 @region.conditional_cache_on_arguments(condition=cache_on)
261 309 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
262 310 cross_copies = False
@@ -276,9 +324,11 b' class SvnRemote(RemoteBase):'
276 324 return history_revisions
277 325 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
278 326
327 @reraise_safe_exceptions
279 328 def node_properties(self, wire, path, revision):
280 329 cache_on, context_uid, repo_id = self._cache_on(wire)
281 330 region = self._region(wire)
331
282 332 @region.conditional_cache_on_arguments(condition=cache_on)
283 333 def _node_properties(_repo_id, _path, _revision):
284 334 repo = self._factory.repo(wire)
@@ -288,7 +338,7 b' class SvnRemote(RemoteBase):'
288 338 return _node_properties(repo_id, path, revision)
289 339
290 340 def file_annotate(self, wire, path, revision):
291 abs_path = 'file://' + urllib.pathname2url(
341 abs_path = 'file://' + urllib.request.pathname2url(
292 342 vcspath.join(wire['path'], path))
293 343 file_uri = svn.core.svn_path_canonicalize(abs_path)
294 344
@@ -309,17 +359,19 b' class SvnRemote(RemoteBase):'
309 359 except svn.core.SubversionException as exc:
310 360 log.exception("Error during blame operation.")
311 361 raise Exception(
312 "Blame not supported or file does not exist at path %s. "
313 "Error %s." % (path, exc))
362 f"Blame not supported or file does not exist at path {path}. "
363 f"Error {exc}.")
314 364
315 return annotations
365 return BinaryEnvelope(annotations)
316 366
317 def get_node_type(self, wire, path, revision=None):
367 @reraise_safe_exceptions
368 def get_node_type(self, wire, revision=None, path=''):
318 369
319 370 cache_on, context_uid, repo_id = self._cache_on(wire)
320 371 region = self._region(wire)
372
321 373 @region.conditional_cache_on_arguments(condition=cache_on)
322 def _get_node_type(_repo_id, _path, _revision):
374 def _get_node_type(_repo_id, _revision, _path):
323 375 repo = self._factory.repo(wire)
324 376 fs_ptr = svn.repos.fs(repo)
325 377 if _revision is None:
@@ -327,12 +379,14 b' class SvnRemote(RemoteBase):'
327 379 root = svn.fs.revision_root(fs_ptr, _revision)
328 380 node = svn.fs.check_path(root, path)
329 381 return NODE_TYPE_MAPPING.get(node, None)
330 return _get_node_type(repo_id, path, revision)
382 return _get_node_type(repo_id, revision, path)
331 383
332 def get_nodes(self, wire, path, revision=None):
384 @reraise_safe_exceptions
385 def get_nodes(self, wire, revision=None, path=''):
333 386
334 387 cache_on, context_uid, repo_id = self._cache_on(wire)
335 388 region = self._region(wire)
389
336 390 @region.conditional_cache_on_arguments(condition=cache_on)
337 391 def _get_nodes(_repo_id, _path, _revision):
338 392 repo = self._factory.repo(wire)
@@ -342,27 +396,32 b' class SvnRemote(RemoteBase):'
342 396 root = svn.fs.revision_root(fsobj, _revision)
343 397 entries = svn.fs.dir_entries(root, path)
344 398 result = []
345 for entry_path, entry_info in entries.iteritems():
399 for entry_path, entry_info in entries.items():
346 400 result.append(
347 401 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
348 402 return result
349 403 return _get_nodes(repo_id, path, revision)
350 404
351 def get_file_content(self, wire, path, rev=None):
405 @reraise_safe_exceptions
406 def get_file_content(self, wire, rev=None, path=''):
352 407 repo = self._factory.repo(wire)
353 408 fsobj = svn.repos.fs(repo)
409
354 410 if rev is None:
355 rev = svn.fs.youngest_revision(fsobj)
411 rev = svn.fs.youngest_rev(fsobj)
412
356 413 root = svn.fs.revision_root(fsobj, rev)
357 414 content = svn.core.Stream(svn.fs.file_contents(root, path))
358 return content.read()
415 return BytesEnvelope(content.read())
359 416
360 def get_file_size(self, wire, path, revision=None):
417 @reraise_safe_exceptions
418 def get_file_size(self, wire, revision=None, path=''):
361 419
362 420 cache_on, context_uid, repo_id = self._cache_on(wire)
363 421 region = self._region(wire)
422
364 423 @region.conditional_cache_on_arguments(condition=cache_on)
365 def _get_file_size(_repo_id, _path, _revision):
424 def _get_file_size(_repo_id, _revision, _path):
366 425 repo = self._factory.repo(wire)
367 426 fsobj = svn.repos.fs(repo)
368 427 if _revision is None:
@@ -370,24 +429,24 b' class SvnRemote(RemoteBase):'
370 429 root = svn.fs.revision_root(fsobj, _revision)
371 430 size = svn.fs.file_length(root, path)
372 431 return size
373 return _get_file_size(repo_id, path, revision)
432 return _get_file_size(repo_id, revision, path)
374 433
375 434 def create_repository(self, wire, compatible_version=None):
376 435 log.info('Creating Subversion repository in path "%s"', wire['path'])
377 436 self._factory.repo(wire, create=True,
378 437 compatible_version=compatible_version)
379 438
380 def get_url_and_credentials(self, src_url):
381 obj = urlparse.urlparse(src_url)
382 username = obj.username or None
383 password = obj.password or None
439 def get_url_and_credentials(self, src_url) -> tuple[str, str, str]:
440 obj = urllib.parse.urlparse(src_url)
441 username = obj.username or ''
442 password = obj.password or ''
384 443 return username, password, src_url
385 444
386 445 def import_remote_repository(self, wire, src_url):
387 446 repo_path = wire['path']
388 447 if not self.is_path_valid_repository(wire, repo_path):
389 448 raise Exception(
390 "Path %s is not a valid Subversion repository." % repo_path)
449 f"Path {repo_path} is not a valid Subversion repository.")
391 450
392 451 username, password, src_url = self.get_url_and_credentials(src_url)
393 452 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
@@ -411,25 +470,26 b' class SvnRemote(RemoteBase):'
411 470 log.debug('Return process ended with code: %s', rdump.returncode)
412 471 if rdump.returncode != 0:
413 472 errors = rdump.stderr.read()
414 log.error('svnrdump dump failed: statuscode %s: message: %s',
415 rdump.returncode, errors)
473 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
474
416 475 reason = 'UNKNOWN'
417 if 'svnrdump: E230001:' in errors:
476 if b'svnrdump: E230001:' in errors:
418 477 reason = 'INVALID_CERTIFICATE'
419 478
420 479 if reason == 'UNKNOWN':
421 reason = 'UNKNOWN:{}'.format(errors)
480 reason = f'UNKNOWN:{safe_str(errors)}'
481
422 482 raise Exception(
423 'Failed to dump the remote repository from %s. Reason:%s' % (
483 'Failed to dump the remote repository from {}. Reason:{}'.format(
424 484 src_url, reason))
425 485 if load.returncode != 0:
426 486 raise Exception(
427 'Failed to load the dump of remote repository from %s.' %
428 (src_url, ))
487 f'Failed to load the dump of remote repository from {src_url}.')
429 488
430 489 def commit(self, wire, message, author, timestamp, updated, removed):
431 assert isinstance(message, str)
432 assert isinstance(author, str)
490
491 message = safe_bytes(message)
492 author = safe_bytes(author)
433 493
434 494 repo = self._factory.repo(wire)
435 495 fsobj = svn.repos.fs(repo)
@@ -453,6 +513,7 b' class SvnRemote(RemoteBase):'
453 513 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
454 514 return commit_id
455 515
516 @reraise_safe_exceptions
456 517 def diff(self, wire, rev1, rev2, path1=None, path2=None,
457 518 ignore_whitespace=False, context=3):
458 519
@@ -461,12 +522,12 b' class SvnRemote(RemoteBase):'
461 522 diff_creator = SvnDiffer(
462 523 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
463 524 try:
464 return diff_creator.generate_diff()
525 return BytesEnvelope(diff_creator.generate_diff())
465 526 except svn.core.SubversionException as e:
466 527 log.exception(
467 528 "Error during diff operation operation. "
468 "Path might not exist %s, %s" % (path1, path2))
469 return ""
529 "Path might not exist %s, %s", path1, path2)
530 return BytesEnvelope(b'')
470 531
471 532 @reraise_safe_exceptions
472 533 def is_large_file(self, wire, path):
@@ -475,18 +536,32 b' class SvnRemote(RemoteBase):'
475 536 @reraise_safe_exceptions
476 537 def is_binary(self, wire, rev, path):
477 538 cache_on, context_uid, repo_id = self._cache_on(wire)
539 region = self._region(wire)
478 540
479 region = self._region(wire)
480 541 @region.conditional_cache_on_arguments(condition=cache_on)
481 542 def _is_binary(_repo_id, _rev, _path):
482 raw_bytes = self.get_file_content(wire, path, rev)
483 return raw_bytes and '\0' in raw_bytes
543 raw_bytes = self.get_file_content(wire, rev, path)
544 if not raw_bytes:
545 return False
546 return b'\0' in raw_bytes
484 547
485 548 return _is_binary(repo_id, rev, path)
486 549
487 550 @reraise_safe_exceptions
551 def md5_hash(self, wire, rev, path):
552 cache_on, context_uid, repo_id = self._cache_on(wire)
553 region = self._region(wire)
554
555 @region.conditional_cache_on_arguments(condition=cache_on)
556 def _md5_hash(_repo_id, _rev, _path):
557 return ''
558
559 return _md5_hash(repo_id, rev, path)
560
561 @reraise_safe_exceptions
488 562 def run_svn_command(self, wire, cmd, **opts):
489 563 path = wire.get('path', None)
564 debug_mode = rhodecode.ConfigGet().get_bool('debug')
490 565
491 566 if path and os.path.isdir(path):
492 567 opts['cwd'] = path
@@ -500,18 +575,22 b' class SvnRemote(RemoteBase):'
500 575
501 576 try:
502 577 _opts.update(opts)
503 p = subprocessio.SubprocessIOChunker(cmd, **_opts)
578 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
504 579
505 return ''.join(p), ''.join(p.error)
506 except (EnvironmentError, OSError) as err:
580 return b''.join(proc), b''.join(proc.stderr)
581 except OSError as err:
507 582 if safe_call:
508 583 return '', safe_str(err).strip()
509 584 else:
510 cmd = ' '.join(cmd) # human friendly CMD
511 tb_err = ("Couldn't run svn command (%s).\n"
512 "Original error was:%s\n"
513 "Call options:%s\n"
514 % (cmd, err, _opts))
585 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
586 call_opts = {}
587 if debug_mode:
588 call_opts = _opts
589
590 tb_err = ("Couldn't run svn command ({}).\n"
591 "Original error was:{}\n"
592 "Call options:{}\n"
593 .format(cmd, err, call_opts))
515 594 log.exception(tb_err)
516 595 raise exceptions.VcsException()(tb_err)
517 596
@@ -522,9 +601,8 b' class SvnRemote(RemoteBase):'
522 601 binary_dir = settings.BINARY_DIR
523 602 executable = None
524 603 if binary_dir:
525 executable = os.path.join(binary_dir, 'python')
526 return install_svn_hooks(
527 repo_path, executable=executable, force_create=force)
604 executable = os.path.join(binary_dir, 'python3')
605 return install_svn_hooks(repo_path, force_create=force)
528 606
529 607 @reraise_safe_exceptions
530 608 def get_hooks_info(self, wire):
@@ -541,13 +619,14 b' class SvnRemote(RemoteBase):'
541 619 pass
542 620
543 621 @reraise_safe_exceptions
544 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
545 archive_dir_name, commit_id):
622 def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path,
623 archive_dir_name, commit_id, cache_config):
546 624
547 625 def walk_tree(root, root_dir, _commit_id):
548 626 """
549 627 Special recursive svn repo walker
550 628 """
629 root_dir = safe_bytes(root_dir)
551 630
552 631 filemode_default = 0o100644
553 632 filemode_executable = 0o100755
@@ -560,10 +639,10 b' class SvnRemote(RemoteBase):'
560 639 # return only DIR, and then all entries in that dir
561 640 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
562 641 new_root = os.path.join(root_dir, f_name)
563 for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id):
564 yield _f_name, _f_data, _f_type
642 yield from walk_tree(root, new_root, _commit_id)
565 643 else:
566 f_path = os.path.join(root_dir, f_name).rstrip('/')
644
645 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
567 646 prop_list = svn.fs.node_proplist(root, f_path)
568 647
569 648 f_mode = filemode_default
@@ -601,11 +680,11 b' class SvnRemote(RemoteBase):'
601 680 data_stream = f_data['content_stream']
602 681 yield ArchiveNode(file_path, mode, is_link, data_stream)
603 682
604 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
605 archive_dir_name, commit_id)
683 return store_archive_in_cache(
684 file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config)
606 685
607 686
608 class SvnDiffer(object):
687 class SvnDiffer:
609 688 """
610 689 Utility to create diffs based on difflib and the Subversion api
611 690 """
@@ -643,15 +722,15 b' class SvnDiffer(object):'
643 722 "Source type: %s, target type: %s" %
644 723 (self.src_kind, self.tgt_kind))
645 724
646 def generate_diff(self):
647 buf = StringIO.StringIO()
725 def generate_diff(self) -> bytes:
726 buf = io.BytesIO()
648 727 if self.tgt_kind == svn.core.svn_node_dir:
649 728 self._generate_dir_diff(buf)
650 729 else:
651 730 self._generate_file_diff(buf)
652 731 return buf.getvalue()
653 732
654 def _generate_dir_diff(self, buf):
733 def _generate_dir_diff(self, buf: io.BytesIO):
655 734 editor = DiffChangeEditor()
656 735 editor_ptr, editor_baton = svn.delta.make_editor(editor)
657 736 svn.repos.dir_delta2(
@@ -672,7 +751,7 b' class SvnDiffer(object):'
672 751 self._generate_node_diff(
673 752 buf, change, path, self.tgt_path, path, self.src_path)
674 753
675 def _generate_file_diff(self, buf):
754 def _generate_file_diff(self, buf: io.BytesIO):
676 755 change = None
677 756 if self.src_kind == svn.core.svn_node_none:
678 757 change = "add"
@@ -684,7 +763,13 b' class SvnDiffer(object):'
684 763 buf, change, tgt_path, tgt_base, src_path, src_base)
685 764
686 765 def _generate_node_diff(
687 self, buf, change, tgt_path, tgt_base, src_path, src_base):
766 self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base):
767
768 tgt_path_bytes = safe_bytes(tgt_path)
769 tgt_path = safe_str(tgt_path)
770
771 src_path_bytes = safe_bytes(src_path)
772 src_path = safe_str(src_path)
688 773
689 774 if self.src_rev == self.tgt_rev and tgt_base == src_base:
690 775 # makes consistent behaviour with git/hg to return empty diff if
@@ -697,55 +782,55 b' class SvnDiffer(object):'
697 782 self.binary_content = False
698 783 mime_type = self._get_mime_type(tgt_full_path)
699 784
700 if mime_type and not mime_type.startswith('text'):
785 if mime_type and not mime_type.startswith(b'text'):
701 786 self.binary_content = True
702 buf.write("=" * 67 + '\n')
703 buf.write("Cannot display: file marked as a binary type.\n")
704 buf.write("svn:mime-type = %s\n" % mime_type)
705 buf.write("Index: %s\n" % (tgt_path, ))
706 buf.write("=" * 67 + '\n')
707 buf.write("diff --git a/%(tgt_path)s b/%(tgt_path)s\n" % {
708 'tgt_path': tgt_path})
787 buf.write(b"=" * 67 + b'\n')
788 buf.write(b"Cannot display: file marked as a binary type.\n")
789 buf.write(b"svn:mime-type = %s\n" % mime_type)
790 buf.write(b"Index: %b\n" % tgt_path_bytes)
791 buf.write(b"=" * 67 + b'\n')
792 buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes))
709 793
710 794 if change == 'add':
711 795 # TODO: johbo: SVN is missing a zero here compared to git
712 buf.write("new file mode 10644\n")
796 buf.write(b"new file mode 10644\n")
797
798 # TODO(marcink): intro to binary detection of svn patches
799 # if self.binary_content:
800 # buf.write(b'GIT binary patch\n')
713 801
714 #TODO(marcink): intro to binary detection of svn patches
802 buf.write(b"--- /dev/null\t(revision 0)\n")
803 src_lines = []
804 else:
805 if change == 'delete':
806 buf.write(b"deleted file mode 10644\n")
807
808 # TODO(marcink): intro to binary detection of svn patches
715 809 # if self.binary_content:
716 810 # buf.write('GIT binary patch\n')
717 811
718 buf.write("--- /dev/null\t(revision 0)\n")
719 src_lines = []
720 else:
721 if change == 'delete':
722 buf.write("deleted file mode 10644\n")
723
724 #TODO(marcink): intro to binary detection of svn patches
725 # if self.binary_content:
726 # buf.write('GIT binary patch\n')
727
728 buf.write("--- a/%s\t(revision %s)\n" % (
729 src_path, self.src_rev))
812 buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev))
730 813 src_lines = self._svn_readlines(self.src_root, src_full_path)
731 814
732 815 if change == 'delete':
733 buf.write("+++ /dev/null\t(revision %s)\n" % (self.tgt_rev, ))
816 buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev)
734 817 tgt_lines = []
735 818 else:
736 buf.write("+++ b/%s\t(revision %s)\n" % (
737 tgt_path, self.tgt_rev))
819 buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev))
738 820 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
739 821
822 # we made our diff header, time to generate the diff content into our buffer
823
740 824 if not self.binary_content:
741 825 udiff = svn_diff.unified_diff(
742 826 src_lines, tgt_lines, context=self.context,
743 827 ignore_blank_lines=self.ignore_whitespace,
744 828 ignore_case=False,
745 829 ignore_space_changes=self.ignore_whitespace)
830
746 831 buf.writelines(udiff)
747 832
748 def _get_mime_type(self, path):
833 def _get_mime_type(self, path) -> bytes:
749 834 try:
750 835 mime_type = svn.fs.node_prop(
751 836 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
@@ -761,7 +846,9 b' class SvnDiffer(object):'
761 846 if node_kind not in (
762 847 svn.core.svn_node_file, svn.core.svn_node_symlink):
763 848 return []
764 content = svn.core.Stream(svn.fs.file_contents(fs_root, node_path)).read()
849 content = svn.core.Stream(
850 svn.fs.file_contents(fs_root, node_path)).read()
851
765 852 return content.splitlines(True)
766 853
767 854
@@ -789,7 +876,7 b' def authorization_callback_allow_all(roo'
789 876 return True
790 877
791 878
792 class TxnNodeProcessor(object):
879 class TxnNodeProcessor:
793 880 """
794 881 Utility to process the change of one node within a transaction root.
795 882
@@ -799,7 +886,7 b' class TxnNodeProcessor(object):'
799 886 """
800 887
801 888 def __init__(self, node, txn_root):
802 assert isinstance(node['path'], str)
889 assert_bytes(node['path'])
803 890
804 891 self.node = node
805 892 self.txn_root = txn_root
@@ -835,23 +922,24 b' class TxnNodeProcessor(object):'
835 922 svn.fs.make_file(self.txn_root, self.node['path'])
836 923
837 924 def _update_file_content(self):
838 assert isinstance(self.node['content'], str)
925 assert_bytes(self.node['content'])
926
839 927 handler, baton = svn.fs.apply_textdelta(
840 928 self.txn_root, self.node['path'], None, None)
841 929 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
842 930
843 931 def _update_file_properties(self):
844 932 properties = self.node.get('properties', {})
845 for key, value in properties.iteritems():
933 for key, value in properties.items():
846 934 svn.fs.change_node_prop(
847 self.txn_root, self.node['path'], key, value)
935 self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value))
848 936
849 937
850 938 def apr_time_t(timestamp):
851 939 """
852 940 Convert a Python timestamp into APR timestamp type apr_time_t
853 941 """
854 return timestamp * 1E6
942 return int(timestamp * 1E6)
855 943
856 944
857 945 def svn_opt_revision_value_t(num):
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 b''
18 18 from vcsserver import scm_app, wsgi_app_caller
19 19
20 20
21 class GitRemoteWsgi(object):
21 class GitRemoteWsgi:
22 22 def handle(self, environ, input_data, *args, **kwargs):
23 23 app = wsgi_app_caller.WSGIAppCaller(
24 24 scm_app.create_git_wsgi_app(*args, **kwargs))
@@ -26,7 +26,7 b' class GitRemoteWsgi(object):'
26 26 return app.handle(environ, input_data)
27 27
28 28
29 class HgRemoteWsgi(object):
29 class HgRemoteWsgi:
30 30 def handle(self, environ, input_data, *args, **kwargs):
31 31 app = wsgi_app_caller.WSGIAppCaller(
32 32 scm_app.create_hg_wsgi_app(*args, **kwargs))
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 b' import mercurial.hgweb.hgweb_mod'
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30
30 from vcsserver.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
@@ -81,7 +81,7 b' class HgWeb(mercurial.hgweb.hgweb_mod.hg'
81 81 first_chunk = None
82 82
83 83 try:
84 data = gen.next()
84 data = next(gen)
85 85
86 86 def first_chunk():
87 87 yield data
@@ -94,16 +94,29 b' class HgWeb(mercurial.hgweb.hgweb_mod.hg'
94 94
95 95 def _runwsgi(self, req, res, repo):
96 96
97 cmd = req.qsparams.get('cmd', '')
97 cmd = req.qsparams.get(b'cmd', '')
98 98 if not mercurial.wireprotoserver.iscmd(cmd):
99 99 # NOTE(marcink): for unsupported commands, we return bad request
100 100 # internally from HG
101 log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
101 102 from mercurial.hgweb.common import statusmessage
102 103 res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
103 res.setbodybytes('')
104 res.setbodybytes(b'')
104 105 return res.sendresponse()
105 106
106 return super(HgWeb, self)._runwsgi(req, res, repo)
107 return super()._runwsgi(req, res, repo)
108
109
110 def sanitize_hg_ui(baseui):
111 # NOTE(marcink): since python3 hgsubversion is deprecated.
112 # From old installations we might still have this set enabled
113 # we explicitly remove this now here to make sure it wont propagate further
114
115 if baseui.config(b'extensions', b'hgsubversion') is not None:
116 for cfg in (baseui._ocfg, baseui._tcfg, baseui._ucfg):
117 if b'extensions' in cfg:
118 if b'hgsubversion' in cfg[b'extensions']:
119 del cfg[b'extensions'][b'hgsubversion']
107 120
108 121
109 122 def make_hg_ui_from_config(repo_config):
@@ -115,10 +128,13 b' def make_hg_ui_from_config(repo_config):'
115 128 baseui._tcfg = mercurial.config.config()
116 129
117 130 for section, option, value in repo_config:
118 baseui.setconfig(section, option, value)
131 baseui.setconfig(
132 ascii_bytes(section, allow_bytes=True),
133 ascii_bytes(option, allow_bytes=True),
134 ascii_bytes(value, allow_bytes=True))
119 135
120 136 # make our hgweb quiet so it doesn't print output
121 baseui.setconfig('ui', 'quiet', 'true')
137 baseui.setconfig(b'ui', b'quiet', b'true')
122 138
123 139 return baseui
124 140
@@ -131,11 +147,14 b' def update_hg_ui_from_hgrc(baseui, repo_'
131 147 return
132 148 log.debug('reading hgrc from %s', path)
133 149 cfg = mercurial.config.config()
134 cfg.read(path)
150 cfg.read(ascii_bytes(path))
135 151 for section in HG_UI_SECTIONS:
136 152 for k, v in cfg.items(section):
137 153 log.debug('settings ui from file: [%s] %s=%s', section, k, v)
138 baseui.setconfig(section, k, v)
154 baseui.setconfig(
155 ascii_bytes(section, allow_bytes=True),
156 ascii_bytes(k, allow_bytes=True),
157 ascii_bytes(v, allow_bytes=True))
139 158
140 159
141 160 def create_hg_wsgi_app(repo_path, repo_name, config):
@@ -149,14 +168,15 b' def create_hg_wsgi_app(repo_path, repo_n'
149 168
150 169 baseui = make_hg_ui_from_config(config)
151 170 update_hg_ui_from_hgrc(baseui, repo_path)
171 sanitize_hg_ui(baseui)
152 172
153 173 try:
154 return HgWeb(repo_path, name=repo_name, baseui=baseui)
174 return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
155 175 except mercurial.error.RequirementError as e:
156 176 raise exceptions.RequirementException(e)(e)
157 177
158 178
159 class GitHandler(object):
179 class GitHandler:
160 180 """
161 181 Handler for Git operations like push/pull etc
162 182 """
@@ -202,7 +222,7 b' def create_git_wsgi_app(repo_path, repo_'
202 222 return app
203 223
204 224
205 class GitLFSHandler(object):
225 class GitLFSHandler:
206 226 """
207 227 Handler for Git LFS operations
208 228 """
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 b' import time'
24 24 log = logging.getLogger(__name__)
25 25
26 26
27 class VcsServer(object):
27 class VcsServer:
28 28 """
29 29 Exposed remote interface of the vcsserver itself.
30 30
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -23,15 +23,17 b' along with git_http_backend.py Project.'
23 23 If not, see <http://www.gnu.org/licenses/>.
24 24 """
25 25 import os
26 import collections
26 27 import logging
27 import subprocess32 as subprocess
28 from collections import deque
29 from threading import Event, Thread
28 import subprocess
29 import threading
30
31 from vcsserver.str_utils import safe_str
30 32
31 33 log = logging.getLogger(__name__)
32 34
33 35
34 class StreamFeeder(Thread):
36 class StreamFeeder(threading.Thread):
35 37 """
36 38 Normal writing into pipe-like is blocking once the buffer is filled.
37 39 This thread allows a thread to seep data from a file-like into a pipe
@@ -40,24 +42,18 b' class StreamFeeder(Thread):'
40 42 """
41 43
42 44 def __init__(self, source):
43 super(StreamFeeder, self).__init__()
45 super().__init__()
44 46 self.daemon = True
45 47 filelike = False
46 self.bytes = bytes()
47 if type(source) in (type(''), bytes, bytearray): # string-like
48 self.bytes = b''
49 if type(source) in (str, bytes, bytearray): # string-like
48 50 self.bytes = bytes(source)
49 51 else: # can be either file pointer or file-like
50 if type(source) in (int, long): # file pointer it is
52 if isinstance(source, int): # file pointer it is
51 53 # converting file descriptor (int) stdin into file-like
52 try:
53 source = os.fdopen(source, 'rb', 16384)
54 except Exception:
55 pass
54 source = os.fdopen(source, 'rb', 16384)
56 55 # let's see if source is file-like by now
57 try:
58 filelike = source.read
59 except Exception:
60 pass
56 filelike = hasattr(source, 'read')
61 57 if not filelike and not self.bytes:
62 58 raise TypeError("StreamFeeder's source object must be a readable "
63 59 "file-like, a file descriptor, or a string-like.")
@@ -65,28 +61,31 b' class StreamFeeder(Thread):'
65 61 self.readiface, self.writeiface = os.pipe()
66 62
67 63 def run(self):
68 t = self.writeiface
64 writer = self.writeiface
69 65 try:
70 66 if self.bytes:
71 os.write(t, self.bytes)
67 os.write(writer, self.bytes)
72 68 else:
73 69 s = self.source
74 b = s.read(4096)
75 while b:
76 os.write(t, b)
77 b = s.read(4096)
70
71 while 1:
72 _bytes = s.read(4096)
73 if not _bytes:
74 break
75 os.write(writer, _bytes)
76
78 77 finally:
79 os.close(t)
78 os.close(writer)
80 79
81 80 @property
82 81 def output(self):
83 82 return self.readiface
84 83
85 84
86 class InputStreamChunker(Thread):
85 class InputStreamChunker(threading.Thread):
87 86 def __init__(self, source, target, buffer_size, chunk_size):
88 87
89 super(InputStreamChunker, self).__init__()
88 super().__init__()
90 89
91 90 self.daemon = True # die die die.
92 91
@@ -95,16 +94,16 b' class InputStreamChunker(Thread):'
95 94 self.chunk_count_max = int(buffer_size / chunk_size) + 1
96 95 self.chunk_size = chunk_size
97 96
98 self.data_added = Event()
97 self.data_added = threading.Event()
99 98 self.data_added.clear()
100 99
101 self.keep_reading = Event()
100 self.keep_reading = threading.Event()
102 101 self.keep_reading.set()
103 102
104 self.EOF = Event()
103 self.EOF = threading.Event()
105 104 self.EOF.clear()
106 105
107 self.go = Event()
106 self.go = threading.Event()
108 107 self.go.set()
109 108
110 109 def stop(self):
@@ -115,7 +114,7 b' class InputStreamChunker(Thread):'
115 114 # go of the input because, if successful, .close() will send EOF
116 115 # down the pipe.
117 116 self.source.close()
118 except:
117 except Exception:
119 118 pass
120 119
121 120 def run(self):
@@ -146,14 +145,14 b' class InputStreamChunker(Thread):'
146 145
147 146 try:
148 147 b = s.read(cs)
149 except ValueError:
148 except ValueError: # probably "I/O operation on closed file"
150 149 b = ''
151 150
152 151 self.EOF.set()
153 152 da.set() # for cases when done but there was no input.
154 153
155 154
156 class BufferedGenerator(object):
155 class BufferedGenerator:
157 156 """
158 157 Class behaves as a non-blocking, buffered pipe reader.
159 158 Reads chunks of data (through a thread)
@@ -166,18 +165,20 b' class BufferedGenerator(object):'
166 165 StopIteration after the last chunk of data is yielded.
167 166 """
168 167
169 def __init__(self, source, buffer_size=65536, chunk_size=4096,
168 def __init__(self, name, source, buffer_size=65536, chunk_size=4096,
170 169 starting_values=None, bottomless=False):
171 170 starting_values = starting_values or []
171 self.name = name
172 self.buffer_size = buffer_size
173 self.chunk_size = chunk_size
172 174
173 175 if bottomless:
174 176 maxlen = int(buffer_size / chunk_size)
175 177 else:
176 178 maxlen = None
177 179
178 self.data = deque(starting_values, maxlen)
179 self.worker = InputStreamChunker(source, self.data, buffer_size,
180 chunk_size)
180 self.data_queue = collections.deque(starting_values, maxlen)
181 self.worker = InputStreamChunker(source, self.data_queue, buffer_size, chunk_size)
181 182 if starting_values:
182 183 self.worker.data_added.set()
183 184 self.worker.start()
@@ -185,17 +186,21 b' class BufferedGenerator(object):'
185 186 ####################
186 187 # Generator's methods
187 188 ####################
189 def __str__(self):
190 return f'BufferedGenerator(name={self.name} chunk: {self.chunk_size} on buffer: {self.buffer_size})'
188 191
189 192 def __iter__(self):
190 193 return self
191 194
192 def next(self):
193 while not len(self.data) and not self.worker.EOF.is_set():
195 def __next__(self):
196
197 while not self.length and not self.worker.EOF.is_set():
194 198 self.worker.data_added.clear()
195 199 self.worker.data_added.wait(0.2)
196 if len(self.data):
200
201 if self.length:
197 202 self.worker.keep_reading.set()
198 return bytes(self.data.popleft())
203 return bytes(self.data_queue.popleft())
199 204 elif self.worker.EOF.is_set():
200 205 raise StopIteration
201 206
@@ -249,7 +254,7 b' class BufferedGenerator(object):'
249 254 @property
250 255 def done_reading(self):
251 256 """
252 Done_reding does not mean that the iterator's buffer is empty.
257 Done_reading does not mean that the iterator's buffer is empty.
253 258 Iterator might have done reading from underlying source, but the read
254 259 chunks might still be available for serving through .next() method.
255 260
@@ -262,34 +267,34 b' class BufferedGenerator(object):'
262 267 """
263 268 returns int.
264 269
265 This is the lenght of the que of chunks, not the length of
270 This is the length of the queue of chunks, not the length of
266 271 the combined contents in those chunks.
267 272
268 273 __len__() cannot be meaningfully implemented because this
269 reader is just flying throuh a bottomless pit content and
270 can only know the lenght of what it already saw.
274 reader is just flying through a bottomless pit content and
275 can only know the length of what it already saw.
271 276
272 277 If __len__() on WSGI server per PEP 3333 returns a value,
273 the responce's length will be set to that. In order not to
278 the response's length will be set to that. In order not to
274 279 confuse WSGI PEP3333 servers, we will not implement __len__
275 280 at all.
276 281 """
277 return len(self.data)
282 return len(self.data_queue)
278 283
279 284 def prepend(self, x):
280 self.data.appendleft(x)
285 self.data_queue.appendleft(x)
281 286
282 287 def append(self, x):
283 self.data.append(x)
288 self.data_queue.append(x)
284 289
285 290 def extend(self, o):
286 self.data.extend(o)
291 self.data_queue.extend(o)
287 292
288 293 def __getitem__(self, i):
289 return self.data[i]
294 return self.data_queue[i]
290 295
291 296
292 class SubprocessIOChunker(object):
297 class SubprocessIOChunker:
293 298 """
294 299 Processor class wrapping handling of subprocess IO.
295 300
@@ -314,7 +319,7 b' class SubprocessIOChunker(object):'
314 319
315 320 - We are multithreaded. Writing in and reading out, err are all sep threads.
316 321 - We support concurrent (in and out) stream processing.
317 - The output is not a stream. It's a queue of read string (bytes, not unicode)
322 - The output is not a stream. It's a queue of read string (bytes, not str)
318 323 chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
319 324 - We are non-blocking in more respects than communicate()
320 325 (reading from subprocess out pauses when internal buffer is full, but
@@ -323,16 +328,16 b' class SubprocessIOChunker(object):'
323 328 does not block the parallel inpipe reading occurring parallel thread.)
324 329
325 330 The purpose of the object is to allow us to wrap subprocess interactions into
326 and interable that can be passed to a WSGI server as the application's return
331 an iterable that can be passed to a WSGI server as the application's return
327 332 value. Because of stream-processing-ability, WSGI does not have to read ALL
328 333 of the subprocess's output and buffer it, before handing it to WSGI server for
329 334 HTTP response. Instead, the class initializer reads just a bit of the stream
330 to figure out if error ocurred or likely to occur and if not, just hands the
335 to figure out if error occurred or likely to occur and if not, just hands the
331 336 further iteration over subprocess output to the server for completion of HTTP
332 337 response.
333 338
334 339 The real or perceived subprocess error is trapped and raised as one of
335 EnvironmentError family of exceptions
340 OSError family of exceptions
336 341
337 342 Example usage:
338 343 # try:
@@ -342,7 +347,7 b' class SubprocessIOChunker(object):'
342 347 # buffer_size = 65536,
343 348 # chunk_size = 4096
344 349 # )
345 # except (EnvironmentError) as e:
350 # except (OSError) as e:
346 351 # print str(e)
347 352 # raise e
348 353 #
@@ -358,15 +363,17 b' class SubprocessIOChunker(object):'
358 363 _close_input_fd = None
359 364
360 365 _closed = False
366 _stdout = None
367 _stderr = None
361 368
362 def __init__(self, cmd, inputstream=None, buffer_size=65536,
369 def __init__(self, cmd, input_stream=None, buffer_size=65536,
363 370 chunk_size=4096, starting_values=None, fail_on_stderr=True,
364 371 fail_on_return_code=True, **kwargs):
365 372 """
366 373 Initializes SubprocessIOChunker
367 374
368 375 :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
369 :param inputstream: (Default: None) A file-like, string, or file pointer.
376 :param input_stream: (Default: None) A file-like, string, or file pointer.
370 377 :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
371 378 :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
372 379 :param starting_values: (Default: []) An array of strings to put in front of output que.
@@ -376,71 +383,86 b' class SubprocessIOChunker(object):'
376 383 exception if the return code is not 0.
377 384 """
378 385
386 kwargs['shell'] = kwargs.get('shell', True)
387
379 388 starting_values = starting_values or []
380 if inputstream:
381 input_streamer = StreamFeeder(inputstream)
389 if input_stream:
390 input_streamer = StreamFeeder(input_stream)
382 391 input_streamer.start()
383 inputstream = input_streamer.output
384 self._close_input_fd = inputstream
392 input_stream = input_streamer.output
393 self._close_input_fd = input_stream
385 394
386 395 self._fail_on_stderr = fail_on_stderr
387 396 self._fail_on_return_code = fail_on_return_code
388
389 _shell = kwargs.get('shell', True)
390 kwargs['shell'] = _shell
397 self.cmd = cmd
391 398
392 _p = subprocess.Popen(cmd, bufsize=-1,
393 stdin=inputstream,
394 stdout=subprocess.PIPE,
395 stderr=subprocess.PIPE,
399 _p = subprocess.Popen(cmd, bufsize=-1, stdin=input_stream, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
396 400 **kwargs)
401 self.process = _p
397 402
398 bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size,
399 starting_values)
400 bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
403 bg_out = BufferedGenerator('stdout', _p.stdout, buffer_size, chunk_size, starting_values)
404 bg_err = BufferedGenerator('stderr', _p.stderr, 10240, 1, bottomless=True)
401 405
402 406 while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
403 407 # doing this until we reach either end of file, or end of buffer.
404 bg_out.data_added_event.wait(1)
408 bg_out.data_added_event.wait(0.2)
405 409 bg_out.data_added_event.clear()
406 410
407 411 # at this point it's still ambiguous if we are done reading or just full buffer.
408 412 # Either way, if error (returned by ended process, or implied based on
409 413 # presence of stuff in stderr output) we error out.
410 414 # Else, we are happy.
411 _returncode = _p.poll()
415 return_code = _p.poll()
416 ret_code_ok = return_code in [None, 0]
417 ret_code_fail = return_code is not None and return_code != 0
418 if (
419 (ret_code_fail and fail_on_return_code) or
420 (ret_code_ok and fail_on_stderr and bg_err.length)
421 ):
412 422
413 if ((_returncode and fail_on_return_code) or
414 (fail_on_stderr and _returncode is None and bg_err.length)):
415 423 try:
416 424 _p.terminate()
417 425 except Exception:
418 426 pass
427
419 428 bg_out.stop()
429 out = b''.join(bg_out)
430 self._stdout = out
431
420 432 bg_err.stop()
421 if fail_on_stderr:
422 err = ''.join(bg_err)
423 raise EnvironmentError(
424 "Subprocess exited due to an error:\n" + err)
425 if _returncode and fail_on_return_code:
426 err = ''.join(bg_err)
433 err = b''.join(bg_err)
434 self._stderr = err
435
436 # code from https://github.com/schacon/grack/pull/7
437 if err.strip() == b'fatal: The remote end hung up unexpectedly' and out.startswith(b'0034shallow '):
438 bg_out = iter([out])
439 _p = None
440 elif err and fail_on_stderr:
441 text_err = err.decode()
442 raise OSError(
443 f"Subprocess exited due to an error:\n{text_err}")
444
445 if ret_code_fail and fail_on_return_code:
446 text_err = err.decode()
427 447 if not err:
428 448 # maybe get empty stderr, try stdout instead
429 449 # in many cases git reports the errors on stdout too
430 err = ''.join(bg_out)
431 raise EnvironmentError(
432 "Subprocess exited with non 0 ret code:%s: stderr:%s" % (
433 _returncode, err))
450 text_err = out.decode()
451 raise OSError(
452 f"Subprocess exited with non 0 ret code:{return_code}: stderr:{text_err}")
434 453
435 self.process = _p
436 self.output = bg_out
437 self.error = bg_err
438 self.inputstream = inputstream
454 self.stdout = bg_out
455 self.stderr = bg_err
456 self.inputstream = input_stream
457
458 def __str__(self):
459 proc = getattr(self, 'process', 'NO_PROCESS')
460 return f'SubprocessIOChunker: {proc}'
439 461
440 462 def __iter__(self):
441 463 return self
442 464
443 def next(self):
465 def __next__(self):
444 466 # Note: mikhail: We need to be sure that we are checking the return
445 467 # code after the stdout stream is closed. Some processes, e.g. git
446 468 # are doing some magic in between closing stdout and terminating the
@@ -449,27 +471,31 b' class SubprocessIOChunker(object):'
449 471 result = None
450 472 stop_iteration = None
451 473 try:
452 result = self.output.next()
474 result = next(self.stdout)
453 475 except StopIteration as e:
454 476 stop_iteration = e
455 477
456 if self.process.poll() and self._fail_on_return_code:
457 err = '%s' % ''.join(self.error)
458 raise EnvironmentError(
459 "Subprocess exited due to an error:\n" + err)
478 if self.process:
479 return_code = self.process.poll()
480 ret_code_fail = return_code is not None and return_code != 0
481 if ret_code_fail and self._fail_on_return_code:
482 self.stop_streams()
483 err = self.get_stderr()
484 raise OSError(
485 f"Subprocess exited (exit_code:{return_code}) due to an error during iteration:\n{err}")
460 486
461 487 if stop_iteration:
462 488 raise stop_iteration
463 489 return result
464 490
465 def throw(self, type, value=None, traceback=None):
466 if self.output.length or not self.output.done_reading:
467 raise type(value)
491 def throw(self, exc_type, value=None, traceback=None):
492 if self.stdout.length or not self.stdout.done_reading:
493 raise exc_type(value)
468 494
469 495 def close(self):
470 496 if self._closed:
471 497 return
472 self._closed = True
498
473 499 try:
474 500 self.process.terminate()
475 501 except Exception:
@@ -477,11 +503,11 b' class SubprocessIOChunker(object):'
477 503 if self._close_input_fd:
478 504 os.close(self._close_input_fd)
479 505 try:
480 self.output.close()
506 self.stdout.close()
481 507 except Exception:
482 508 pass
483 509 try:
484 self.error.close()
510 self.stderr.close()
485 511 except Exception:
486 512 pass
487 513 try:
@@ -489,6 +515,24 b' class SubprocessIOChunker(object):'
489 515 except Exception:
490 516 pass
491 517
518 self._closed = True
519
520 def stop_streams(self):
521 getattr(self.stdout, 'stop', lambda: None)()
522 getattr(self.stderr, 'stop', lambda: None)()
523
524 def get_stdout(self):
525 if self._stdout:
526 return self._stdout
527 else:
528 return b''.join(self.stdout)
529
530 def get_stderr(self):
531 if self._stderr:
532 return self._stderr
533 else:
534 return b''.join(self.stderr)
535
492 536
493 537 def run_command(arguments, env=None):
494 538 """
@@ -506,9 +550,9 b' def run_command(arguments, env=None):'
506 550 if env:
507 551 _opts.update({'env': env})
508 552 proc = SubprocessIOChunker(cmd, **_opts)
509 return ''.join(proc), ''.join(proc.error)
510 except (EnvironmentError, OSError) as err:
511 cmd = ' '.join(cmd) # human friendly CMD
553 return b''.join(proc), b''.join(proc.stderr)
554 except OSError as err:
555 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
512 556 tb_err = ("Couldn't run subprocessio command (%s).\n"
513 557 "Original error was:%s\n" % (cmd, err))
514 558 log.exception(tb_err)
@@ -1,7 +1,7 b''
1 # -*- coding: utf-8 -*-
2 1 #
3 2 # Copyright (C) 2004-2009 Edgewall Software
4 3 # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
4 # Copyright (C) 2014-2023 RhodeCode GmbH
5 5 # All rights reserved.
6 6 #
7 7 # This software is licensed as described in the file COPYING, which
@@ -17,15 +17,15 b''
17 17 import difflib
18 18
19 19
20 def get_filtered_hunks(fromlines, tolines, context=None,
21 ignore_blank_lines=False, ignore_case=False,
22 ignore_space_changes=False):
20 def get_filtered_hunks(from_lines, to_lines, context=None,
21 ignore_blank_lines: bool = False, ignore_case: bool = False,
22 ignore_space_changes: bool = False):
23 23 """Retrieve differences in the form of `difflib.SequenceMatcher`
24 24 opcodes, grouped according to the ``context`` and ``ignore_*``
25 25 parameters.
26 26
27 :param fromlines: list of lines corresponding to the old content
28 :param tolines: list of lines corresponding to the new content
27 :param from_lines: list of lines corresponding to the old content
28 :param to_lines: list of lines corresponding to the new content
29 29 :param ignore_blank_lines: differences about empty lines only are ignored
30 30 :param ignore_case: upper case / lower case only differences are ignored
31 31 :param ignore_space_changes: differences in amount of spaces are ignored
@@ -37,27 +37,27 b' def get_filtered_hunks(fromlines, toline'
37 37 to filter out the results will come straight from the
38 38 SequenceMatcher.
39 39 """
40 hunks = get_hunks(fromlines, tolines, context)
40 hunks = get_hunks(from_lines, to_lines, context)
41 41 if ignore_space_changes or ignore_case or ignore_blank_lines:
42 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
42 hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context,
43 43 ignore_blank_lines, ignore_case,
44 44 ignore_space_changes)
45 45 return hunks
46 46
47 47
48 def get_hunks(fromlines, tolines, context=None):
48 def get_hunks(from_lines, to_lines, context=None):
49 49 """Generator yielding grouped opcodes describing differences .
50 50
51 51 See `get_filtered_hunks` for the parameter descriptions.
52 52 """
53 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
53 matcher = difflib.SequenceMatcher(None, from_lines, to_lines)
54 54 if context is None:
55 55 return (hunk for hunk in [matcher.get_opcodes()])
56 56 else:
57 57 return matcher.get_grouped_opcodes(context)
58 58
59 59
60 def filter_ignorable_lines(hunks, fromlines, tolines, context,
60 def filter_ignorable_lines(hunks, from_lines, to_lines, context,
61 61 ignore_blank_lines, ignore_case,
62 62 ignore_space_changes):
63 63 """Detect line changes that should be ignored and emits them as
@@ -67,11 +67,12 b' def filter_ignorable_lines(hunks, fromli'
67 67 See `get_filtered_hunks` for the parameter descriptions.
68 68 """
69 69 def is_ignorable(tag, fromlines, tolines):
70
70 71 if tag == 'delete' and ignore_blank_lines:
71 if ''.join(fromlines) == '':
72 if b''.join(fromlines) == b'':
72 73 return True
73 74 elif tag == 'insert' and ignore_blank_lines:
74 if ''.join(tolines) == '':
75 if b''.join(tolines) == b'':
75 76 return True
76 77 elif tag == 'replace' and (ignore_case or ignore_space_changes):
77 78 if len(fromlines) != len(tolines):
@@ -81,7 +82,7 b' def filter_ignorable_lines(hunks, fromli'
81 82 if ignore_case:
82 83 input_str = input_str.lower()
83 84 if ignore_space_changes:
84 input_str = ' '.join(input_str.split())
85 input_str = b' '.join(input_str.split())
85 86 return input_str
86 87
87 88 for i in range(len(fromlines)):
@@ -101,7 +102,7 b' def filter_ignorable_lines(hunks, fromli'
101 102 else:
102 103 prev = (tag, i1, i2, j1, j2)
103 104 else:
104 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
105 if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]):
105 106 ignored_lines = True
106 107 if prev:
107 108 prev = 'equal', prev[1], i2, prev[3], j2
@@ -125,10 +126,11 b' def filter_ignorable_lines(hunks, fromli'
125 126 nn = n + n
126 127
127 128 group = []
129
128 130 def all_equal():
129 131 all(op[0] == 'equal' for op in group)
130 132 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
131 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
133 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
132 134 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
133 135 elif tag == 'equal' and i2 - i1 > nn:
134 136 group.append((tag, i1, min(i2, i1 + n), j1,
@@ -140,7 +142,7 b' def filter_ignorable_lines(hunks, fromli'
140 142 group.append((tag, i1, i2, j1, j2))
141 143
142 144 if group and not (len(group) == 1 and group[0][0] == 'equal'):
143 if group[-1][0] == 'equal': # Fixup trailing unchanged block
145 if group[-1][0] == 'equal': # Fixup trailing unchanged block
144 146 tag, i1, i2, j1, j2 = group[-1]
145 147 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
146 148 if not all_equal():
@@ -150,22 +152,30 b' def filter_ignorable_lines(hunks, fromli'
150 152 yield hunk
151 153
152 154
153 NO_NEWLINE_AT_END = '\\ No newline at end of file'
155 NO_NEWLINE_AT_END = b'\\ No newline at end of file'
156 LINE_TERM = b'\n'
154 157
155 158
156 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
157 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
159 def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False,
160 ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes:
158 161 """
159 162 Generator producing lines corresponding to a textual diff.
160 163
161 164 See `get_filtered_hunks` for the parameter descriptions.
162 165 """
163 166 # TODO: johbo: Check if this can be nicely integrated into the matching
167
164 168 if ignore_space_changes:
165 fromlines = [l.strip() for l in fromlines]
166 tolines = [l.strip() for l in tolines]
169 from_lines = [l.strip() for l in from_lines]
170 to_lines = [l.strip() for l in to_lines]
167 171
168 for group in get_filtered_hunks(fromlines, tolines, context,
172 def _hunk_range(start, length) -> bytes:
173 if length != 1:
174 return b'%d,%d' % (start, length)
175 else:
176 return b'%d' % (start,)
177
178 for group in get_filtered_hunks(from_lines, to_lines, context,
169 179 ignore_blank_lines, ignore_case,
170 180 ignore_space_changes):
171 181 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
@@ -173,37 +183,30 b' def unified_diff(fromlines, tolines, con'
173 183 i1, i2 = -1, -1 # support for Add changes
174 184 if j1 == 0 and j2 == 0:
175 185 j1, j2 = -1, -1 # support for Delete changes
176 yield '@@ -%s +%s @@%s' % (
186 yield b'@@ -%b +%b @@%b' % (
177 187 _hunk_range(i1 + 1, i2 - i1),
178 188 _hunk_range(j1 + 1, j2 - j1),
179 189 lineterm)
180 190 for tag, i1, i2, j1, j2 in group:
181 191 if tag == 'equal':
182 for line in fromlines[i1:i2]:
192 for line in from_lines[i1:i2]:
183 193 if not line.endswith(lineterm):
184 yield ' ' + line + lineterm
194 yield b' ' + line + lineterm
185 195 yield NO_NEWLINE_AT_END + lineterm
186 196 else:
187 yield ' ' + line
197 yield b' ' + line
188 198 else:
189 199 if tag in ('replace', 'delete'):
190 for line in fromlines[i1:i2]:
200 for line in from_lines[i1:i2]:
191 201 if not line.endswith(lineterm):
192 yield '-' + line + lineterm
202 yield b'-' + line + lineterm
193 203 yield NO_NEWLINE_AT_END + lineterm
194 204 else:
195 yield '-' + line
205 yield b'-' + line
196 206 if tag in ('replace', 'insert'):
197 for line in tolines[j1:j2]:
207 for line in to_lines[j1:j2]:
198 208 if not line.endswith(lineterm):
199 yield '+' + line + lineterm
209 yield b'+' + line + lineterm
200 210 yield NO_NEWLINE_AT_END + lineterm
201 211 else:
202 yield '+' + line
203
204
205 def _hunk_range(start, length):
206 if length != 1:
207 return '%d,%d' % (start, length)
208 else:
209 return '%d' % (start, )
212 yield b'+' + line
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -18,11 +18,10 b''
18 18 import os
19 19 import shutil
20 20 import tempfile
21
22 import configobj
21 import configparser
23 22
24 23
25 class ContextINI(object):
24 class ContextINI:
26 25 """
27 26 Allows to create a new test.ini file as a copy of existing one with edited
28 27 data. If existing file is not present, it creates a new one. Example usage::
@@ -53,17 +52,17 b' class ContextINI(object):'
53 52 with open(self.new_path, 'wb'):
54 53 pass
55 54
56 config = configobj.ConfigObj(
57 self.new_path, file_error=True, write_empty_values=True)
55 parser = configparser.ConfigParser()
56 parser.read(self.ini_file_path)
58 57
59 58 for data in self.ini_params:
60 section, ini_params = data.items()[0]
61 key, val = ini_params.items()[0]
62 if section not in config:
63 config[section] = {}
64 config[section][key] = val
65
66 config.write()
59 section, ini_params = list(data.items())[0]
60 key, val = list(ini_params.items())[0]
61 if section not in parser:
62 parser[section] = {}
63 parser[section][key] = val
64 with open(self.ini_file_path, 'w') as f:
65 parser.write(f)
67 66 return self.new_path
68 67
69 68 def __exit__(self, exc_type, exc_val, exc_tb):
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -21,8 +21,7 b' import pytest'
21 21 import dulwich.errors
22 22 from mock import Mock, patch
23 23
24 from vcsserver import git
25
24 from vcsserver.remote import git_remote
26 25
27 26 SAMPLE_REFS = {
28 27 'HEAD': 'fd627b9e0dd80b47be81af07c4a98518244ed2f7',
@@ -34,26 +33,26 b' SAMPLE_REFS = {'
34 33
35 34
36 35 @pytest.fixture
37 def git_remote():
36 def git_remote_fix():
38 37 """
39 38 A GitRemote instance with a mock factory.
40 39 """
41 40 factory = Mock()
42 remote = git.GitRemote(factory)
41 remote = git_remote.GitRemote(factory)
43 42 return remote
44 43
45 44
46 def test_discover_git_version(git_remote):
47 version = git_remote.discover_git_version()
45 def test_discover_git_version(git_remote_fix):
46 version = git_remote_fix.discover_git_version()
48 47 assert version
49 48
50 49
51 class TestGitFetch(object):
52 def setup(self):
50 class TestGitFetch:
51 def setup_method(self):
53 52 self.mock_repo = Mock()
54 53 factory = Mock()
55 54 factory.repo = Mock(return_value=self.mock_repo)
56 self.remote_git = git.GitRemote(factory)
55 self.remote_git = git_remote.GitRemote(factory)
57 56
58 57 def test_fetches_all_when_no_commit_ids_specified(self):
59 58 def side_effect(determine_wants, *args, **kwargs):
@@ -67,8 +66,8 b' class TestGitFetch(object):'
67 66
68 67 def test_fetches_specified_commits(self):
69 68 selected_refs = {
70 'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68',
71 'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e',
69 'refs/tags/v0.1.8': b'74ebce002c088b8a5ecf40073db09375515ecd68',
70 'refs/tags/v0.1.3': b'5a3a8fb005554692b16e21dee62bf02667d8dc3e',
72 71 }
73 72
74 73 def side_effect(determine_wants, *args, **kwargs):
@@ -80,41 +79,41 b' class TestGitFetch(object):'
80 79 mock_fetch.side_effect = side_effect
81 80 self.remote_git.pull(
82 81 wire={}, url='/tmp/', apply_refs=False,
83 refs=selected_refs.keys())
82 refs=list(selected_refs.keys()))
84 83 determine_wants = self.mock_repo.object_store.determine_wants_all
85 84 assert determine_wants.call_count == 0
86 85
87 86 def test_get_remote_refs(self):
88 87 factory = Mock()
89 remote_git = git.GitRemote(factory)
90 url = 'http://example.com/test/test.git'
88 remote_git = git_remote.GitRemote(factory)
89 url = 'https://example.com/test/test.git'
91 90 sample_refs = {
92 91 'refs/tags/v0.1.8': '74ebce002c088b8a5ecf40073db09375515ecd68',
93 92 'refs/tags/v0.1.3': '5a3a8fb005554692b16e21dee62bf02667d8dc3e',
94 93 }
95 94
96 with patch('vcsserver.git.Repo', create=False) as mock_repo:
95 with patch('vcsserver.remote.git_remote.Repo', create=False) as mock_repo:
97 96 mock_repo().get_refs.return_value = sample_refs
98 97 remote_refs = remote_git.get_remote_refs(wire={}, url=url)
99 98 mock_repo().get_refs.assert_called_once_with()
100 99 assert remote_refs == sample_refs
101 100
102 101
103 class TestReraiseSafeExceptions(object):
102 class TestReraiseSafeExceptions:
104 103
105 104 def test_method_decorated_with_reraise_safe_exceptions(self):
106 105 factory = Mock()
107 git_remote = git.GitRemote(factory)
106 git_remote_instance = git_remote.GitRemote(factory)
108 107
109 108 def fake_function():
110 109 return None
111 110
112 decorator = git.reraise_safe_exceptions(fake_function)
111 decorator = git_remote.reraise_safe_exceptions(fake_function)
113 112
114 methods = inspect.getmembers(git_remote, predicate=inspect.ismethod)
113 methods = inspect.getmembers(git_remote_instance, predicate=inspect.ismethod)
115 114 for method_name, method in methods:
116 115 if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']:
117 assert method.im_func.__code__ == decorator.__code__
116 assert method.__func__.__code__ == decorator.__code__
118 117
119 118 @pytest.mark.parametrize('side_effect, expected_type', [
120 119 (dulwich.errors.ChecksumMismatch('0000000', 'deadbeef'), 'lookup'),
@@ -125,7 +124,7 b' class TestReraiseSafeExceptions(object):'
125 124 (dulwich.errors.UnexpectedCommandError('test-cmd'), 'error'),
126 125 ])
127 126 def test_safe_exceptions_reraised(self, side_effect, expected_type):
128 @git.reraise_safe_exceptions
127 @git_remote.reraise_safe_exceptions
129 128 def fake_method():
130 129 raise side_effect
131 130
@@ -135,26 +134,29 b' class TestReraiseSafeExceptions(object):'
135 134 assert exc_info.value._vcs_kind == expected_type
136 135
137 136
138 class TestDulwichRepoWrapper(object):
137 class TestDulwichRepoWrapper:
139 138 def test_calls_close_on_delete(self):
140 139 isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True)
141 with isdir_patcher:
142 repo = git.Repo('/tmp/abcde')
143 with patch.object(git.DulwichRepo, 'close') as close_mock:
144 del repo
145 close_mock.assert_called_once_with()
140 with patch.object(git_remote.Repo, 'close') as close_mock:
141 with isdir_patcher:
142 repo = git_remote.Repo('/tmp/abcde')
143 assert repo is not None
144 repo.__del__()
145 # can't use del repo as in python3 this isn't always calling .__del__()
146
147 close_mock.assert_called_once_with()
146 148
147 149
148 class TestGitFactory(object):
150 class TestGitFactory:
149 151 def test_create_repo_returns_dulwich_wrapper(self):
150 152
151 153 with patch('vcsserver.lib.rc_cache.region_meta.dogpile_cache_regions') as mock:
152 154 mock.side_effect = {'repo_objects': ''}
153 factory = git.GitFactory()
155 factory = git_remote.GitFactory()
154 156 wire = {
155 157 'path': '/tmp/abcde'
156 158 }
157 159 isdir_patcher = patch('dulwich.repo.os.path.isdir', return_value=True)
158 160 with isdir_patcher:
159 161 result = factory._create_repo(wire, True)
160 assert isinstance(result, git.Repo)
162 assert isinstance(result, git_remote.Repo)
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -21,21 +21,22 b' import traceback'
21 21
22 22 import pytest
23 23 from mercurial.error import LookupError
24 from mock import Mock, MagicMock, patch
24 from mock import Mock, patch
25 25
26 from vcsserver import exceptions, hg, hgcompat
26 from vcsserver import exceptions, hgcompat
27 from vcsserver.remote import hg_remote
27 28
28 29
29 class TestDiff(object):
30 class TestDiff:
30 31 def test_raising_safe_exception_when_lookup_failed(self):
31 32
32 33 factory = Mock()
33 hg_remote = hg.HgRemote(factory)
34 hg_remote_instance = hg_remote.HgRemote(factory)
34 35 with patch('mercurial.patch.diff') as diff_mock:
35 diff_mock.side_effect = LookupError(
36 'deadbeef', 'index', 'message')
36 diff_mock.side_effect = LookupError(b'deadbeef', b'index', b'message')
37
37 38 with pytest.raises(Exception) as exc_info:
38 hg_remote.diff(
39 hg_remote_instance.diff(
39 40 wire={}, commit_id_1='deadbeef', commit_id_2='deadbee1',
40 41 file_filter=None, opt_git=True, opt_ignorews=True,
41 42 context=3)
@@ -43,26 +44,28 b' class TestDiff(object):'
43 44 assert exc_info.value._vcs_kind == 'lookup'
44 45
45 46
46 class TestReraiseSafeExceptions(object):
47 class TestReraiseSafeExceptions:
48 original_traceback = None
49
47 50 def test_method_decorated_with_reraise_safe_exceptions(self):
48 51 factory = Mock()
49 hg_remote = hg.HgRemote(factory)
50 methods = inspect.getmembers(hg_remote, predicate=inspect.ismethod)
51 decorator = hg.reraise_safe_exceptions(None)
52 hg_remote_instance = hg_remote.HgRemote(factory)
53 methods = inspect.getmembers(hg_remote_instance, predicate=inspect.ismethod)
54 decorator = hg_remote.reraise_safe_exceptions(None)
52 55 for method_name, method in methods:
53 56 if not method_name.startswith('_') and method_name not in ['vcsserver_invalidate_cache']:
54 assert method.im_func.__code__ == decorator.__code__
57 assert method.__func__.__code__ == decorator.__code__
55 58
56 59 @pytest.mark.parametrize('side_effect, expected_type', [
57 (hgcompat.Abort(), 'abort'),
58 (hgcompat.InterventionRequired(), 'abort'),
60 (hgcompat.Abort(b'failed-abort'), 'abort'),
61 (hgcompat.InterventionRequired(b'intervention-required'), 'abort'),
59 62 (hgcompat.RepoLookupError(), 'lookup'),
60 (hgcompat.LookupError('deadbeef', 'index', 'message'), 'lookup'),
63 (hgcompat.LookupError(b'deadbeef', b'index', b'message'), 'lookup'),
61 64 (hgcompat.RepoError(), 'error'),
62 65 (hgcompat.RequirementError(), 'requirement'),
63 66 ])
64 67 def test_safe_exceptions_reraised(self, side_effect, expected_type):
65 @hg.reraise_safe_exceptions
68 @hg_remote.reraise_safe_exceptions
66 69 def fake_method():
67 70 raise side_effect
68 71
@@ -72,15 +75,16 b' class TestReraiseSafeExceptions(object):'
72 75 assert exc_info.value._vcs_kind == expected_type
73 76
74 77 def test_keeps_original_traceback(self):
75 @hg.reraise_safe_exceptions
78
79 @hg_remote.reraise_safe_exceptions
76 80 def fake_method():
77 81 try:
78 raise hgcompat.Abort()
82 raise hgcompat.Abort(b'test-abort')
79 83 except:
80 self.original_traceback = traceback.format_tb(
81 sys.exc_info()[2])
84 self.original_traceback = traceback.format_tb(sys.exc_info()[2])
82 85 raise
83 86
87 new_traceback = None
84 88 try:
85 89 fake_method()
86 90 except Exception:
@@ -89,8 +93,8 b' class TestReraiseSafeExceptions(object):'
89 93 new_traceback_tail = new_traceback[-len(self.original_traceback):]
90 94 assert new_traceback_tail == self.original_traceback
91 95
92 def test_maps_unknow_exceptions_to_unhandled(self):
93 @hg.reraise_safe_exceptions
96 def test_maps_unknown_exceptions_to_unhandled(self):
97 @hg_remote.reraise_safe_exceptions
94 98 def stub_method():
95 99 raise ValueError('stub')
96 100
@@ -99,7 +103,7 b' class TestReraiseSafeExceptions(object):'
99 103 assert exc_info.value._vcs_kind == 'unhandled'
100 104
101 105 def test_does_not_map_known_exceptions(self):
102 @hg.reraise_safe_exceptions
106 @hg_remote.reraise_safe_exceptions
103 107 def stub_method():
104 108 raise exceptions.LookupException()('stub')
105 109
@@ -1,5 +1,5 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 # Copyright (C) 2014-2020 RhodeCode GmbH
2 # Copyright (C) 2014-2023 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
@@ -21,14 +21,14 b' import pytest'
21 21 from vcsserver import hgcompat, hgpatches
22 22
23 23
24 LARGEFILES_CAPABILITY = 'largefiles=serve'
24 LARGEFILES_CAPABILITY = b'largefiles=serve'
25 25
26 26
27 27 def test_patch_largefiles_capabilities_applies_patch(
28 28 patched_capabilities):
29 29 lfproto = hgcompat.largefiles.proto
30 30 hgpatches.patch_largefiles_capabilities()
31 assert lfproto._capabilities.func_name == '_dynamic_capabilities'
31 assert lfproto._capabilities.__name__ == '_dynamic_capabilities'
32 32
33 33
34 34 def test_dynamic_capabilities_uses_original_function_if_not_enabled(
@@ -72,11 +72,6 b' def test_dynamic_capabilities_uses_large'
72 72 assert LARGEFILES_CAPABILITY in caps
73 73
74 74
75 def test_hgsubversion_import():
76 from hgsubversion import svnrepo
77 assert svnrepo
78
79
80 75 @pytest.fixture
81 76 def patched_capabilities(request):
82 77 """
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (1103 lines changed) Show them Hide them
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now