##// END OF EJS Templates
python3: fixes and code optimization for python3.11
super-admin -
r1114:8a549c84 python3
parent child Browse files
Show More
@@ -1,178 +1,178 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import os
18 18 import sys
19 19 import traceback
20 20 import logging
21 21 import urllib.parse
22 22
23 23 from vcsserver.lib.rc_cache import region_meta
24 24
25 25 from vcsserver import exceptions
26 26 from vcsserver.exceptions import NoContentException
27 27 from vcsserver.hgcompat import archival
28 28 from vcsserver.str_utils import safe_bytes
29 29
30 30 log = logging.getLogger(__name__)
31 31
32 32
33 33 class RepoFactory(object):
34 34 """
35 35 Utility to create instances of repository
36 36
37 37 It provides internal caching of the `repo` object based on
38 38 the :term:`call context`.
39 39 """
40 40 repo_type = None
41 41
42 42 def __init__(self):
43 43 self._cache_region = region_meta.dogpile_cache_regions['repo_object']
44 44
45 45 def _create_config(self, path, config):
46 46 config = {}
47 47 return config
48 48
49 49 def _create_repo(self, wire, create):
50 50 raise NotImplementedError()
51 51
52 52 def repo(self, wire, create=False):
53 53 raise NotImplementedError()
54 54
55 55
56 56 def obfuscate_qs(query_string):
57 57 if query_string is None:
58 58 return None
59 59
60 60 parsed = []
61 61 for k, v in urllib.parse.parse_qsl(query_string, keep_blank_values=True):
62 62 if k in ['auth_token', 'api_key']:
63 63 v = "*****"
64 64 parsed.append((k, v))
65 65
66 66 return '&'.join('{}{}'.format(
67 k, '={}'.format(v) if v else '') for k, v in parsed)
67 k, f'={v}' if v else '') for k, v in parsed)
68 68
69 69
70 70 def raise_from_original(new_type, org_exc: Exception):
71 71 """
72 72 Raise a new exception type with original args and traceback.
73 73 """
74 74
75 75 exc_type, exc_value, exc_traceback = sys.exc_info()
76 76 new_exc = new_type(*exc_value.args)
77 77
78 78 # store the original traceback into the new exc
79 79 new_exc._org_exc_tb = traceback.format_tb(exc_traceback)
80 80
81 81 try:
82 82 raise new_exc.with_traceback(exc_traceback)
83 83 finally:
84 84 del exc_traceback
85 85
86 86
87 87 class ArchiveNode(object):
88 88 def __init__(self, path, mode, is_link, raw_bytes):
89 89 self.path = path
90 90 self.mode = mode
91 91 self.is_link = is_link
92 92 self.raw_bytes = raw_bytes
93 93
94 94
95 95 def archive_repo(walker, archive_dest_path, kind, mtime, archive_at_path,
96 96 archive_dir_name, commit_id, write_metadata=True, extra_metadata=None):
97 97 """
98 98 walker should be a file walker, for example:
99 99 def walker():
100 100 for file_info in files:
101 101 yield ArchiveNode(fn, mode, is_link, ctx[fn].data)
102 102 """
103 103 extra_metadata = extra_metadata or {}
104 104 archive_dest_path = safe_bytes(archive_dest_path)
105 105
106 106 if kind == "tgz":
107 107 archiver = archival.tarit(archive_dest_path, mtime, b"gz")
108 108 elif kind == "tbz2":
109 109 archiver = archival.tarit(archive_dest_path, mtime, b"bz2")
110 110 elif kind == 'zip':
111 111 archiver = archival.zipit(archive_dest_path, mtime)
112 112 else:
113 113 raise exceptions.ArchiveException()(
114 114 f'Remote does not support: "{kind}" archive type.')
115 115
116 116 for f in walker(commit_id, archive_at_path):
117 117 f_path = os.path.join(safe_bytes(archive_dir_name), safe_bytes(f.path).lstrip(b'/'))
118 118 try:
119 119 archiver.addfile(f_path, f.mode, f.is_link, f.raw_bytes())
120 120 except NoContentException:
121 121 # NOTE(marcink): this is a special case for SVN so we can create "empty"
122 122 # directories which arent supported by archiver
123 123 archiver.addfile(os.path.join(f_path, b'.dir'), f.mode, f.is_link, b'')
124 124
125 125 if write_metadata:
126 126 metadata = dict([
127 127 ('commit_id', commit_id),
128 128 ('mtime', mtime),
129 129 ])
130 130 metadata.update(extra_metadata)
131 131
132 132 meta = [safe_bytes(f"{f_name}:{value}") for f_name, value in metadata.items()]
133 133 f_path = os.path.join(safe_bytes(archive_dir_name), b'.archival.txt')
134 134 archiver.addfile(f_path, 0o644, False, b'\n'.join(meta))
135 135
136 136 return archiver.done()
137 137
138 138
139 139 class BinaryEnvelope(object):
140 140 def __init__(self, value: bytes, bin_type=True):
141 141 self.value = value
142 142 self.bin_type = bin_type
143 143
144 144 def __len__(self):
145 145 return len(self.value)
146 146
147 147 def __getitem__(self, index):
148 148 return self.value[index]
149 149
150 150 def __iter__(self):
151 151 return iter(self.value)
152 152
153 153 def __str__(self):
154 154 return str(self.value)
155 155
156 156 def __repr__(self):
157 157 return repr(self.value)
158 158
159 159 def __eq__(self, other):
160 160 if isinstance(other, BinaryEnvelope):
161 161 return self.value == other.value
162 162 return False
163 163
164 164 def __ne__(self, other):
165 165 return not self.__eq__(other)
166 166
167 167 def __add__(self, other):
168 168 if isinstance(other, BinaryEnvelope):
169 169 return BinaryEnvelope(self.value + other.value)
170 170 raise TypeError(f"unsupported operand type(s) for +: 'BinaryEnvelope' and '{type(other)}'")
171 171
172 172 def __radd__(self, other):
173 173 if isinstance(other, BinaryEnvelope):
174 174 return BinaryEnvelope(other.value + self.value)
175 175 raise TypeError(f"unsupported operand type(s) for +: '{type(other)}' and 'BinaryEnvelope'")
176 176
177 177
178 178
@@ -1,125 +1,125 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """
19 19 Special exception handling over the wire.
20 20
21 21 Since we cannot assume that our client is able to import our exception classes,
22 22 this module provides a "wrapping" mechanism to raise plain exceptions
23 23 which contain an extra attribute `_vcs_kind` to allow a client to distinguish
24 24 different error conditions.
25 25 """
26 26
27 27 from pyramid.httpexceptions import HTTPLocked, HTTPForbidden
28 28
29 29
30 30 def _make_exception(kind, org_exc, *args):
31 31 """
32 32 Prepares a base `Exception` instance to be sent over the wire.
33 33
34 34 To give our caller a hint what this is about, it will attach an attribute
35 35 `_vcs_kind` to the exception.
36 36 """
37 37 exc = Exception(*args)
38 38 exc._vcs_kind = kind
39 39 exc._org_exc = org_exc
40 40 exc._org_exc_tb = getattr(org_exc, '_org_exc_tb', '')
41 41 return exc
42 42
43 43
44 44 def AbortException(org_exc=None):
45 45 def _make_exception_wrapper(*args):
46 46 return _make_exception('abort', org_exc, *args)
47 47 return _make_exception_wrapper
48 48
49 49
50 50 def ArchiveException(org_exc=None):
51 51 def _make_exception_wrapper(*args):
52 52 return _make_exception('archive', org_exc, *args)
53 53 return _make_exception_wrapper
54 54
55 55
56 56 def LookupException(org_exc=None):
57 57 def _make_exception_wrapper(*args):
58 58 return _make_exception('lookup', org_exc, *args)
59 59 return _make_exception_wrapper
60 60
61 61
62 62 def VcsException(org_exc=None):
63 63 def _make_exception_wrapper(*args):
64 64 return _make_exception('error', org_exc, *args)
65 65 return _make_exception_wrapper
66 66
67 67
68 68 def RepositoryLockedException(org_exc=None):
69 69 def _make_exception_wrapper(*args):
70 70 return _make_exception('repo_locked', org_exc, *args)
71 71 return _make_exception_wrapper
72 72
73 73
74 74 def RepositoryBranchProtectedException(org_exc=None):
75 75 def _make_exception_wrapper(*args):
76 76 return _make_exception('repo_branch_protected', org_exc, *args)
77 77 return _make_exception_wrapper
78 78
79 79
80 80 def RequirementException(org_exc=None):
81 81 def _make_exception_wrapper(*args):
82 82 return _make_exception('requirement', org_exc, *args)
83 83 return _make_exception_wrapper
84 84
85 85
86 86 def UnhandledException(org_exc=None):
87 87 def _make_exception_wrapper(*args):
88 88 return _make_exception('unhandled', org_exc, *args)
89 89 return _make_exception_wrapper
90 90
91 91
92 92 def URLError(org_exc=None):
93 93 def _make_exception_wrapper(*args):
94 94 return _make_exception('url_error', org_exc, *args)
95 95 return _make_exception_wrapper
96 96
97 97
98 98 def SubrepoMergeException(org_exc=None):
99 99 def _make_exception_wrapper(*args):
100 100 return _make_exception('subrepo_merge_error', org_exc, *args)
101 101 return _make_exception_wrapper
102 102
103 103
104 104 class HTTPRepoLocked(HTTPLocked):
105 105 """
106 106 Subclass of HTTPLocked response that allows to set the title and status
107 107 code via constructor arguments.
108 108 """
109 109 def __init__(self, title, status_code=None, **kwargs):
110 110 self.code = status_code or HTTPLocked.code
111 111 self.title = title
112 super(HTTPRepoLocked, self).__init__(**kwargs)
112 super().__init__(**kwargs)
113 113
114 114
115 115 class HTTPRepoBranchProtected(HTTPForbidden):
116 116 def __init__(self, *args, **kwargs):
117 117 super(HTTPForbidden, self).__init__(*args, **kwargs)
118 118
119 119
120 120 class RefNotFoundException(KeyError):
121 121 pass
122 122
123 123
124 124 class NoContentException(ValueError):
125 125 pass
@@ -1,779 +1,777 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 2 # Copyright (C) 2014-2020 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18 import io
21 19 import os
22 20 import sys
23 21 import logging
24 22 import collections
25 23 import importlib
26 24 import base64
27 25 import msgpack
28 26 import dataclasses
29 27 import pygit2
30 28
31 29 import http.client
32 30
33 31
34 32 import mercurial.scmutil
35 33 import mercurial.node
36 34
37 35 from vcsserver.lib.rc_json import json
38 36 from vcsserver import exceptions, subprocessio, settings
39 37 from vcsserver.str_utils import ascii_str, safe_str
40 38 from vcsserver.remote.git import Repository
41 39
42 40 log = logging.getLogger(__name__)
43 41
44 42
45 43 class HooksHttpClient(object):
46 44 proto = 'msgpack.v1'
47 45 connection = None
48 46
49 47 def __init__(self, hooks_uri):
50 48 self.hooks_uri = hooks_uri
51 49
52 50 def __repr__(self):
53 51 return f'{self.__class__}(hook_uri={self.hooks_uri}, proto={self.proto})'
54 52
55 53 def __call__(self, method, extras):
56 54 connection = http.client.HTTPConnection(self.hooks_uri)
57 55 # binary msgpack body
58 56 headers, body = self._serialize(method, extras)
59 57 log.debug('Doing a new hooks call using HTTPConnection to %s', self.hooks_uri)
60 58
61 59 try:
62 60 try:
63 61 connection.request('POST', '/', body, headers)
64 62 except Exception as error:
65 63 log.error('Hooks calling Connection failed on %s, org error: %s', connection.__dict__, error)
66 64 raise
67 65
68 66 response = connection.getresponse()
69 67 try:
70 68 return msgpack.load(response)
71 69 except Exception:
72 70 response_data = response.read()
73 71 log.exception('Failed to decode hook response json data. '
74 72 'response_code:%s, raw_data:%s',
75 73 response.status, response_data)
76 74 raise
77 75 finally:
78 76 connection.close()
79 77
80 78 @classmethod
81 79 def _serialize(cls, hook_name, extras):
82 80 data = {
83 81 'method': hook_name,
84 82 'extras': extras
85 83 }
86 84 headers = {
87 85 "rc-hooks-protocol": cls.proto,
88 86 "Connection": "keep-alive"
89 87 }
90 88 return headers, msgpack.packb(data)
91 89
92 90
93 91 class HooksDummyClient(object):
94 92 def __init__(self, hooks_module):
95 93 self._hooks_module = importlib.import_module(hooks_module)
96 94
97 95 def __call__(self, hook_name, extras):
98 96 with self._hooks_module.Hooks() as hooks:
99 97 return getattr(hooks, hook_name)(extras)
100 98
101 99
102 100 class HooksShadowRepoClient(object):
103 101
104 102 def __call__(self, hook_name, extras):
105 103 return {'output': '', 'status': 0}
106 104
107 105
108 106 class RemoteMessageWriter(object):
109 107 """Writer base class."""
110 108 def write(self, message):
111 109 raise NotImplementedError()
112 110
113 111
114 112 class HgMessageWriter(RemoteMessageWriter):
115 113 """Writer that knows how to send messages to mercurial clients."""
116 114
117 115 def __init__(self, ui):
118 116 self.ui = ui
119 117
120 118 def write(self, message: str):
121 119 # TODO: Check why the quiet flag is set by default.
122 120 old = self.ui.quiet
123 121 self.ui.quiet = False
124 122 self.ui.status(message.encode('utf-8'))
125 123 self.ui.quiet = old
126 124
127 125
128 126 class GitMessageWriter(RemoteMessageWriter):
129 127 """Writer that knows how to send messages to git clients."""
130 128
131 129 def __init__(self, stdout=None):
132 130 self.stdout = stdout or sys.stdout
133 131
134 132 def write(self, message: str):
135 133 self.stdout.write(message)
136 134
137 135
138 136 class SvnMessageWriter(RemoteMessageWriter):
139 137 """Writer that knows how to send messages to svn clients."""
140 138
141 139 def __init__(self, stderr=None):
142 140 # SVN needs data sent to stderr for back-to-client messaging
143 141 self.stderr = stderr or sys.stderr
144 142
145 143 def write(self, message):
146 144 self.stderr.write(message.encode('utf-8'))
147 145
148 146
149 147 def _handle_exception(result):
150 148 exception_class = result.get('exception')
151 149 exception_traceback = result.get('exception_traceback')
152 150
153 151 if exception_traceback:
154 152 log.error('Got traceback from remote call:%s', exception_traceback)
155 153
156 154 if exception_class == 'HTTPLockedRC':
157 155 raise exceptions.RepositoryLockedException()(*result['exception_args'])
158 156 elif exception_class == 'HTTPBranchProtected':
159 157 raise exceptions.RepositoryBranchProtectedException()(*result['exception_args'])
160 158 elif exception_class == 'RepositoryError':
161 159 raise exceptions.VcsException()(*result['exception_args'])
162 160 elif exception_class:
163 161 raise Exception(
164 162 f"""Got remote exception "{exception_class}" with args "{result['exception_args']}" """
165 163 )
166 164
167 165
168 166 def _get_hooks_client(extras):
169 167 hooks_uri = extras.get('hooks_uri')
170 168 is_shadow_repo = extras.get('is_shadow_repo')
171 169 if hooks_uri:
172 170 return HooksHttpClient(extras['hooks_uri'])
173 171 elif is_shadow_repo:
174 172 return HooksShadowRepoClient()
175 173 else:
176 174 return HooksDummyClient(extras['hooks_module'])
177 175
178 176
179 177 def _call_hook(hook_name, extras, writer):
180 178 hooks_client = _get_hooks_client(extras)
181 179 log.debug('Hooks, using client:%s', hooks_client)
182 180 result = hooks_client(hook_name, extras)
183 181 log.debug('Hooks got result: %s', result)
184 182 _handle_exception(result)
185 183 writer.write(result['output'])
186 184
187 185 return result['status']
188 186
189 187
190 188 def _extras_from_ui(ui):
191 189 hook_data = ui.config(b'rhodecode', b'RC_SCM_DATA')
192 190 if not hook_data:
193 191 # maybe it's inside environ ?
194 192 env_hook_data = os.environ.get('RC_SCM_DATA')
195 193 if env_hook_data:
196 194 hook_data = env_hook_data
197 195
198 196 extras = {}
199 197 if hook_data:
200 198 extras = json.loads(hook_data)
201 199 return extras
202 200
203 201
204 202 def _rev_range_hash(repo, node, check_heads=False):
205 203 from vcsserver.hgcompat import get_ctx
206 204
207 205 commits = []
208 206 revs = []
209 207 start = get_ctx(repo, node).rev()
210 208 end = len(repo)
211 209 for rev in range(start, end):
212 210 revs.append(rev)
213 211 ctx = get_ctx(repo, rev)
214 212 commit_id = ascii_str(mercurial.node.hex(ctx.node()))
215 213 branch = safe_str(ctx.branch())
216 214 commits.append((commit_id, branch))
217 215
218 216 parent_heads = []
219 217 if check_heads:
220 218 parent_heads = _check_heads(repo, start, end, revs)
221 219 return commits, parent_heads
222 220
223 221
224 222 def _check_heads(repo, start, end, commits):
225 223 from vcsserver.hgcompat import get_ctx
226 224 changelog = repo.changelog
227 225 parents = set()
228 226
229 227 for new_rev in commits:
230 228 for p in changelog.parentrevs(new_rev):
231 229 if p == mercurial.node.nullrev:
232 230 continue
233 231 if p < start:
234 232 parents.add(p)
235 233
236 234 for p in parents:
237 235 branch = get_ctx(repo, p).branch()
238 236 # The heads descending from that parent, on the same branch
239 237 parent_heads = {p}
240 238 reachable = {p}
241 239 for x in range(p + 1, end):
242 240 if get_ctx(repo, x).branch() != branch:
243 241 continue
244 242 for pp in changelog.parentrevs(x):
245 243 if pp in reachable:
246 244 reachable.add(x)
247 245 parent_heads.discard(pp)
248 246 parent_heads.add(x)
249 247 # More than one head? Suggest merging
250 248 if len(parent_heads) > 1:
251 249 return list(parent_heads)
252 250
253 251 return []
254 252
255 253
256 254 def _get_git_env():
257 255 env = {}
258 256 for k, v in os.environ.items():
259 257 if k.startswith('GIT'):
260 258 env[k] = v
261 259
262 260 # serialized version
263 261 return [(k, v) for k, v in env.items()]
264 262
265 263
266 264 def _get_hg_env(old_rev, new_rev, txnid, repo_path):
267 265 env = {}
268 266 for k, v in os.environ.items():
269 267 if k.startswith('HG'):
270 268 env[k] = v
271 269
272 270 env['HG_NODE'] = old_rev
273 271 env['HG_NODE_LAST'] = new_rev
274 272 env['HG_TXNID'] = txnid
275 273 env['HG_PENDING'] = repo_path
276 274
277 275 return [(k, v) for k, v in env.items()]
278 276
279 277
280 278 def repo_size(ui, repo, **kwargs):
281 279 extras = _extras_from_ui(ui)
282 280 return _call_hook('repo_size', extras, HgMessageWriter(ui))
283 281
284 282
285 283 def pre_pull(ui, repo, **kwargs):
286 284 extras = _extras_from_ui(ui)
287 285 return _call_hook('pre_pull', extras, HgMessageWriter(ui))
288 286
289 287
290 288 def pre_pull_ssh(ui, repo, **kwargs):
291 289 extras = _extras_from_ui(ui)
292 290 if extras and extras.get('SSH'):
293 291 return pre_pull(ui, repo, **kwargs)
294 292 return 0
295 293
296 294
297 295 def post_pull(ui, repo, **kwargs):
298 296 extras = _extras_from_ui(ui)
299 297 return _call_hook('post_pull', extras, HgMessageWriter(ui))
300 298
301 299
302 300 def post_pull_ssh(ui, repo, **kwargs):
303 301 extras = _extras_from_ui(ui)
304 302 if extras and extras.get('SSH'):
305 303 return post_pull(ui, repo, **kwargs)
306 304 return 0
307 305
308 306
309 307 def pre_push(ui, repo, node=None, **kwargs):
310 308 """
311 309 Mercurial pre_push hook
312 310 """
313 311 extras = _extras_from_ui(ui)
314 312 detect_force_push = extras.get('detect_force_push')
315 313
316 314 rev_data = []
317 315 hook_type: str = safe_str(kwargs.get('hooktype'))
318 316
319 317 if node and hook_type == 'pretxnchangegroup':
320 318 branches = collections.defaultdict(list)
321 319 commits, _heads = _rev_range_hash(repo, node, check_heads=detect_force_push)
322 320 for commit_id, branch in commits:
323 321 branches[branch].append(commit_id)
324 322
325 323 for branch, commits in branches.items():
326 324 old_rev = ascii_str(kwargs.get('node_last')) or commits[0]
327 325 rev_data.append({
328 326 'total_commits': len(commits),
329 327 'old_rev': old_rev,
330 328 'new_rev': commits[-1],
331 329 'ref': '',
332 330 'type': 'branch',
333 331 'name': branch,
334 332 })
335 333
336 334 for push_ref in rev_data:
337 335 push_ref['multiple_heads'] = _heads
338 336
339 337 repo_path = os.path.join(
340 338 extras.get('repo_store', ''), extras.get('repository', ''))
341 339 push_ref['hg_env'] = _get_hg_env(
342 340 old_rev=push_ref['old_rev'],
343 341 new_rev=push_ref['new_rev'], txnid=ascii_str(kwargs.get('txnid')),
344 342 repo_path=repo_path)
345 343
346 344 extras['hook_type'] = hook_type or 'pre_push'
347 345 extras['commit_ids'] = rev_data
348 346
349 347 return _call_hook('pre_push', extras, HgMessageWriter(ui))
350 348
351 349
352 350 def pre_push_ssh(ui, repo, node=None, **kwargs):
353 351 extras = _extras_from_ui(ui)
354 352 if extras.get('SSH'):
355 353 return pre_push(ui, repo, node, **kwargs)
356 354
357 355 return 0
358 356
359 357
360 358 def pre_push_ssh_auth(ui, repo, node=None, **kwargs):
361 359 """
362 360 Mercurial pre_push hook for SSH
363 361 """
364 362 extras = _extras_from_ui(ui)
365 363 if extras.get('SSH'):
366 364 permission = extras['SSH_PERMISSIONS']
367 365
368 366 if 'repository.write' == permission or 'repository.admin' == permission:
369 367 return 0
370 368
371 369 # non-zero ret code
372 370 return 1
373 371
374 372 return 0
375 373
376 374
377 375 def post_push(ui, repo, node, **kwargs):
378 376 """
379 377 Mercurial post_push hook
380 378 """
381 379 extras = _extras_from_ui(ui)
382 380
383 381 commit_ids = []
384 382 branches = []
385 383 bookmarks = []
386 384 tags = []
387 385 hook_type: str = safe_str(kwargs.get('hooktype'))
388 386
389 387 commits, _heads = _rev_range_hash(repo, node)
390 388 for commit_id, branch in commits:
391 389 commit_ids.append(commit_id)
392 390 if branch not in branches:
393 391 branches.append(branch)
394 392
395 393 if hasattr(ui, '_rc_pushkey_bookmarks'):
396 394 bookmarks = ui._rc_pushkey_bookmarks
397 395
398 396 extras['hook_type'] = hook_type or 'post_push'
399 397 extras['commit_ids'] = commit_ids
400 398
401 399 extras['new_refs'] = {
402 400 'branches': branches,
403 401 'bookmarks': bookmarks,
404 402 'tags': tags
405 403 }
406 404
407 405 return _call_hook('post_push', extras, HgMessageWriter(ui))
408 406
409 407
410 408 def post_push_ssh(ui, repo, node, **kwargs):
411 409 """
412 410 Mercurial post_push hook for SSH
413 411 """
414 412 if _extras_from_ui(ui).get('SSH'):
415 413 return post_push(ui, repo, node, **kwargs)
416 414 return 0
417 415
418 416
419 417 def key_push(ui, repo, **kwargs):
420 418 from vcsserver.hgcompat import get_ctx
421 419
422 420 if kwargs['new'] != b'0' and kwargs['namespace'] == b'bookmarks':
423 421 # store new bookmarks in our UI object propagated later to post_push
424 422 ui._rc_pushkey_bookmarks = get_ctx(repo, kwargs['key']).bookmarks()
425 423 return
426 424
427 425
428 426 # backward compat
429 427 log_pull_action = post_pull
430 428
431 429 # backward compat
432 430 log_push_action = post_push
433 431
434 432
435 433 def handle_git_pre_receive(unused_repo_path, unused_revs, unused_env):
436 434 """
437 435 Old hook name: keep here for backward compatibility.
438 436
439 437 This is only required when the installed git hooks are not upgraded.
440 438 """
441 439 pass
442 440
443 441
444 442 def handle_git_post_receive(unused_repo_path, unused_revs, unused_env):
445 443 """
446 444 Old hook name: keep here for backward compatibility.
447 445
448 446 This is only required when the installed git hooks are not upgraded.
449 447 """
450 448 pass
451 449
452 450
453 451 @dataclasses.dataclass
454 452 class HookResponse:
455 453 status: int
456 454 output: str
457 455
458 456
459 457 def git_pre_pull(extras) -> HookResponse:
460 458 """
461 459 Pre pull hook.
462 460
463 461 :param extras: dictionary containing the keys defined in simplevcs
464 462 :type extras: dict
465 463
466 464 :return: status code of the hook. 0 for success.
467 465 :rtype: int
468 466 """
469 467
470 468 if 'pull' not in extras['hooks']:
471 469 return HookResponse(0, '')
472 470
473 471 stdout = io.StringIO()
474 472 try:
475 473 status_code = _call_hook('pre_pull', extras, GitMessageWriter(stdout))
476 474
477 475 except Exception as error:
478 476 log.exception('Failed to call pre_pull hook')
479 477 status_code = 128
480 478 stdout.write(f'ERROR: {error}\n')
481 479
482 480 return HookResponse(status_code, stdout.getvalue())
483 481
484 482
485 483 def git_post_pull(extras) -> HookResponse:
486 484 """
487 485 Post pull hook.
488 486
489 487 :param extras: dictionary containing the keys defined in simplevcs
490 488 :type extras: dict
491 489
492 490 :return: status code of the hook. 0 for success.
493 491 :rtype: int
494 492 """
495 493 if 'pull' not in extras['hooks']:
496 494 return HookResponse(0, '')
497 495
498 496 stdout = io.StringIO()
499 497 try:
500 498 status = _call_hook('post_pull', extras, GitMessageWriter(stdout))
501 499 except Exception as error:
502 500 status = 128
503 501 stdout.write(f'ERROR: {error}\n')
504 502
505 503 return HookResponse(status, stdout.getvalue())
506 504
507 505
508 506 def _parse_git_ref_lines(revision_lines):
509 507 rev_data = []
510 508 for revision_line in revision_lines or []:
511 509 old_rev, new_rev, ref = revision_line.strip().split(' ')
512 510 ref_data = ref.split('/', 2)
513 511 if ref_data[1] in ('tags', 'heads'):
514 512 rev_data.append({
515 513 # NOTE(marcink):
516 514 # we're unable to tell total_commits for git at this point
517 515 # but we set the variable for consistency with GIT
518 516 'total_commits': -1,
519 517 'old_rev': old_rev,
520 518 'new_rev': new_rev,
521 519 'ref': ref,
522 520 'type': ref_data[1],
523 521 'name': ref_data[2],
524 522 })
525 523 return rev_data
526 524
527 525
528 526 def git_pre_receive(unused_repo_path, revision_lines, env) -> int:
529 527 """
530 528 Pre push hook.
531 529
532 530 :return: status code of the hook. 0 for success.
533 531 """
534 532 extras = json.loads(env['RC_SCM_DATA'])
535 533 rev_data = _parse_git_ref_lines(revision_lines)
536 534 if 'push' not in extras['hooks']:
537 535 return 0
538 536 empty_commit_id = '0' * 40
539 537
540 538 detect_force_push = extras.get('detect_force_push')
541 539
542 540 for push_ref in rev_data:
543 541 # store our git-env which holds the temp store
544 542 push_ref['git_env'] = _get_git_env()
545 543 push_ref['pruned_sha'] = ''
546 544 if not detect_force_push:
547 545 # don't check for forced-push when we don't need to
548 546 continue
549 547
550 548 type_ = push_ref['type']
551 549 new_branch = push_ref['old_rev'] == empty_commit_id
552 550 delete_branch = push_ref['new_rev'] == empty_commit_id
553 551 if type_ == 'heads' and not (new_branch or delete_branch):
554 552 old_rev = push_ref['old_rev']
555 553 new_rev = push_ref['new_rev']
556 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, '^{}'.format(new_rev)]
554 cmd = [settings.GIT_EXECUTABLE, 'rev-list', old_rev, f'^{new_rev}']
557 555 stdout, stderr = subprocessio.run_command(
558 556 cmd, env=os.environ.copy())
559 557 # means we're having some non-reachable objects, this forced push was used
560 558 if stdout:
561 559 push_ref['pruned_sha'] = stdout.splitlines()
562 560
563 561 extras['hook_type'] = 'pre_receive'
564 562 extras['commit_ids'] = rev_data
565 563
566 564 stdout = sys.stdout
567 565 status_code = _call_hook('pre_push', extras, GitMessageWriter(stdout))
568 566
569 567 return status_code
570 568
571 569
572 570 def git_post_receive(unused_repo_path, revision_lines, env) -> int:
573 571 """
574 572 Post push hook.
575 573
576 574 :return: status code of the hook. 0 for success.
577 575 """
578 576 extras = json.loads(env['RC_SCM_DATA'])
579 577 if 'push' not in extras['hooks']:
580 578 return 0
581 579
582 580 rev_data = _parse_git_ref_lines(revision_lines)
583 581
584 582 git_revs = []
585 583
586 584 # N.B.(skreft): it is ok to just call git, as git before calling a
587 585 # subcommand sets the PATH environment variable so that it point to the
588 586 # correct version of the git executable.
589 587 empty_commit_id = '0' * 40
590 588 branches = []
591 589 tags = []
592 590 for push_ref in rev_data:
593 591 type_ = push_ref['type']
594 592
595 593 if type_ == 'heads':
596 594 # starting new branch case
597 595 if push_ref['old_rev'] == empty_commit_id:
598 596 push_ref_name = push_ref['name']
599 597
600 598 if push_ref_name not in branches:
601 599 branches.append(push_ref_name)
602 600
603 601 need_head_set = ''
604 602 with Repository(os.getcwd()) as repo:
605 603 try:
606 604 repo.head
607 605 except pygit2.GitError:
608 606 need_head_set = f'refs/heads/{push_ref_name}'
609 607
610 608 if need_head_set:
611 609 repo.set_head(need_head_set)
612 610 print(f"Setting default branch to {push_ref_name}")
613 611
614 612 cmd = [settings.GIT_EXECUTABLE, 'for-each-ref', '--format=%(refname)', 'refs/heads/*']
615 613 stdout, stderr = subprocessio.run_command(
616 614 cmd, env=os.environ.copy())
617 615 heads = safe_str(stdout)
618 616 heads = heads.replace(push_ref['ref'], '')
619 617 heads = ' '.join(head for head
620 618 in heads.splitlines() if head) or '.'
621 619 cmd = [settings.GIT_EXECUTABLE, 'log', '--reverse',
622 620 '--pretty=format:%H', '--', push_ref['new_rev'],
623 621 '--not', heads]
624 622 stdout, stderr = subprocessio.run_command(
625 623 cmd, env=os.environ.copy())
626 624 git_revs.extend(list(map(ascii_str, stdout.splitlines())))
627 625
628 626 # delete branch case
629 627 elif push_ref['new_rev'] == empty_commit_id:
630 628 git_revs.append('delete_branch=>%s' % push_ref['name'])
631 629 else:
632 630 if push_ref['name'] not in branches:
633 631 branches.append(push_ref['name'])
634 632
635 633 cmd = [settings.GIT_EXECUTABLE, 'log',
636 634 '{old_rev}..{new_rev}'.format(**push_ref),
637 635 '--reverse', '--pretty=format:%H']
638 636 stdout, stderr = subprocessio.run_command(
639 637 cmd, env=os.environ.copy())
640 638 # we get bytes from stdout, we need str to be consistent
641 639 log_revs = list(map(ascii_str, stdout.splitlines()))
642 640 git_revs.extend(log_revs)
643 641
644 642 # Pure pygit2 impl. but still 2-3x slower :/
645 643 # results = []
646 644 #
647 645 # with Repository(os.getcwd()) as repo:
648 646 # repo_new_rev = repo[push_ref['new_rev']]
649 647 # repo_old_rev = repo[push_ref['old_rev']]
650 648 # walker = repo.walk(repo_new_rev.id, pygit2.GIT_SORT_TOPOLOGICAL)
651 649 #
652 650 # for commit in walker:
653 651 # if commit.id == repo_old_rev.id:
654 652 # break
655 653 # results.append(commit.id.hex)
656 654 # # reverse the order, can't use GIT_SORT_REVERSE
657 655 # log_revs = results[::-1]
658 656
659 657 elif type_ == 'tags':
660 658 if push_ref['name'] not in tags:
661 659 tags.append(push_ref['name'])
662 660 git_revs.append('tag=>%s' % push_ref['name'])
663 661
664 662 extras['hook_type'] = 'post_receive'
665 663 extras['commit_ids'] = git_revs
666 664 extras['new_refs'] = {
667 665 'branches': branches,
668 666 'bookmarks': [],
669 667 'tags': tags,
670 668 }
671 669
672 670 stdout = sys.stdout
673 671
674 672 if 'repo_size' in extras['hooks']:
675 673 try:
676 674 _call_hook('repo_size', extras, GitMessageWriter(stdout))
677 675 except Exception:
678 676 pass
679 677
680 678 status_code = _call_hook('post_push', extras, GitMessageWriter(stdout))
681 679 return status_code
682 680
683 681
684 682 def _get_extras_from_txn_id(path, txn_id):
685 683 extras = {}
686 684 try:
687 685 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
688 686 '-t', txn_id,
689 687 '--revprop', path, 'rc-scm-extras']
690 688 stdout, stderr = subprocessio.run_command(
691 689 cmd, env=os.environ.copy())
692 690 extras = json.loads(base64.urlsafe_b64decode(stdout))
693 691 except Exception:
694 692 log.exception('Failed to extract extras info from txn_id')
695 693
696 694 return extras
697 695
698 696
699 697 def _get_extras_from_commit_id(commit_id, path):
700 698 extras = {}
701 699 try:
702 700 cmd = [settings.SVNLOOK_EXECUTABLE, 'pget',
703 701 '-r', commit_id,
704 702 '--revprop', path, 'rc-scm-extras']
705 703 stdout, stderr = subprocessio.run_command(
706 704 cmd, env=os.environ.copy())
707 705 extras = json.loads(base64.urlsafe_b64decode(stdout))
708 706 except Exception:
709 707 log.exception('Failed to extract extras info from commit_id')
710 708
711 709 return extras
712 710
713 711
714 712 def svn_pre_commit(repo_path, commit_data, env):
715 713 path, txn_id = commit_data
716 714 branches = []
717 715 tags = []
718 716
719 717 if env.get('RC_SCM_DATA'):
720 718 extras = json.loads(env['RC_SCM_DATA'])
721 719 else:
722 720 # fallback method to read from TXN-ID stored data
723 721 extras = _get_extras_from_txn_id(path, txn_id)
724 722 if not extras:
725 723 return 0
726 724
727 725 extras['hook_type'] = 'pre_commit'
728 726 extras['commit_ids'] = [txn_id]
729 727 extras['txn_id'] = txn_id
730 728 extras['new_refs'] = {
731 729 'total_commits': 1,
732 730 'branches': branches,
733 731 'bookmarks': [],
734 732 'tags': tags,
735 733 }
736 734
737 735 return _call_hook('pre_push', extras, SvnMessageWriter())
738 736
739 737
740 738 def svn_post_commit(repo_path, commit_data, env):
741 739 """
742 740 commit_data is path, rev, txn_id
743 741 """
744 742 if len(commit_data) == 3:
745 743 path, commit_id, txn_id = commit_data
746 744 elif len(commit_data) == 2:
747 745 log.error('Failed to extract txn_id from commit_data using legacy method. '
748 746 'Some functionality might be limited')
749 747 path, commit_id = commit_data
750 748 txn_id = None
751 749
752 750 branches = []
753 751 tags = []
754 752
755 753 if env.get('RC_SCM_DATA'):
756 754 extras = json.loads(env['RC_SCM_DATA'])
757 755 else:
758 756 # fallback method to read from TXN-ID stored data
759 757 extras = _get_extras_from_commit_id(commit_id, path)
760 758 if not extras:
761 759 return 0
762 760
763 761 extras['hook_type'] = 'post_commit'
764 762 extras['commit_ids'] = [commit_id]
765 763 extras['txn_id'] = txn_id
766 764 extras['new_refs'] = {
767 765 'branches': branches,
768 766 'bookmarks': [],
769 767 'tags': tags,
770 768 'total_commits': 1,
771 769 }
772 770
773 771 if 'repo_size' in extras['hooks']:
774 772 try:
775 773 _call_hook('repo_size', extras, SvnMessageWriter())
776 774 except Exception:
777 775 pass
778 776
779 777 return _call_hook('post_push', extras, SvnMessageWriter())
@@ -1,773 +1,773 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import io
19 19 import os
20 20 import sys
21 21 import base64
22 22 import locale
23 23 import logging
24 24 import uuid
25 25 import time
26 26 import wsgiref.util
27 27 import traceback
28 28 import tempfile
29 29 import psutil
30 30
31 31 from itertools import chain
32 32
33 33 import msgpack
34 34 import configparser
35 35
36 36 from pyramid.config import Configurator
37 37 from pyramid.wsgi import wsgiapp
38 38 from pyramid.response import Response
39 39
40 40 from vcsserver.base import BinaryEnvelope
41 41 from vcsserver.lib.rc_json import json
42 42 from vcsserver.config.settings_maker import SettingsMaker
43 43 from vcsserver.str_utils import safe_int, safe_bytes, safe_str
44 44 from vcsserver.lib.statsd_client import StatsdClient
45 45
46 46 log = logging.getLogger(__name__)
47 47
48 48 # due to Mercurial/glibc2.27 problems we need to detect if locale settings are
49 49 # causing problems and "fix" it in case they do and fallback to LC_ALL = C
50 50
51 51 try:
52 52 locale.setlocale(locale.LC_ALL, '')
53 53 except locale.Error as e:
54 54 log.error(
55 55 'LOCALE ERROR: failed to set LC_ALL, fallback to LC_ALL=C, org error: %s', e)
56 56 os.environ['LC_ALL'] = 'C'
57 57
58 58
59 59 import vcsserver
60 60 from vcsserver import remote_wsgi, scm_app, settings, hgpatches
61 61 from vcsserver.git_lfs.app import GIT_LFS_CONTENT_TYPE, GIT_LFS_PROTO_PAT
62 62 from vcsserver.echo_stub import remote_wsgi as remote_wsgi_stub
63 63 from vcsserver.echo_stub.echo_app import EchoApp
64 64 from vcsserver.exceptions import HTTPRepoLocked, HTTPRepoBranchProtected
65 65 from vcsserver.lib.exc_tracking import store_exception
66 66 from vcsserver.server import VcsServer
67 67
68 68 strict_vcs = True
69 69
70 70 git_import_err = None
71 71 try:
72 72 from vcsserver.remote.git import GitFactory, GitRemote
73 73 except ImportError as e:
74 74 GitFactory = None
75 75 GitRemote = None
76 76 git_import_err = e
77 77 if strict_vcs:
78 78 raise
79 79
80 80
81 81 hg_import_err = None
82 82 try:
83 83 from vcsserver.remote.hg import MercurialFactory, HgRemote
84 84 except ImportError as e:
85 85 MercurialFactory = None
86 86 HgRemote = None
87 87 hg_import_err = e
88 88 if strict_vcs:
89 89 raise
90 90
91 91
92 92 svn_import_err = None
93 93 try:
94 94 from vcsserver.remote.svn import SubversionFactory, SvnRemote
95 95 except ImportError as e:
96 96 SubversionFactory = None
97 97 SvnRemote = None
98 98 svn_import_err = e
99 99 if strict_vcs:
100 100 raise
101 101
102 102
103 103 def _is_request_chunked(environ):
104 104 stream = environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked'
105 105 return stream
106 106
107 107
108 108 def log_max_fd():
109 109 try:
110 110 maxfd = psutil.Process().rlimit(psutil.RLIMIT_NOFILE)[1]
111 111 log.info('Max file descriptors value: %s', maxfd)
112 112 except Exception:
113 113 pass
114 114
115 115
116 116 class VCS(object):
117 117 def __init__(self, locale_conf=None, cache_config=None):
118 118 self.locale = locale_conf
119 119 self.cache_config = cache_config
120 120 self._configure_locale()
121 121
122 122 log_max_fd()
123 123
124 124 if GitFactory and GitRemote:
125 125 git_factory = GitFactory()
126 126 self._git_remote = GitRemote(git_factory)
127 127 else:
128 128 log.error("Git client import failed: %s", git_import_err)
129 129
130 130 if MercurialFactory and HgRemote:
131 131 hg_factory = MercurialFactory()
132 132 self._hg_remote = HgRemote(hg_factory)
133 133 else:
134 134 log.error("Mercurial client import failed: %s", hg_import_err)
135 135
136 136 if SubversionFactory and SvnRemote:
137 137 svn_factory = SubversionFactory()
138 138
139 139 # hg factory is used for svn url validation
140 140 hg_factory = MercurialFactory()
141 141 self._svn_remote = SvnRemote(svn_factory, hg_factory=hg_factory)
142 142 else:
143 143 log.error("Subversion client import failed: %s", svn_import_err)
144 144
145 145 self._vcsserver = VcsServer()
146 146
147 147 def _configure_locale(self):
148 148 if self.locale:
149 149 log.info('Settings locale: `LC_ALL` to %s', self.locale)
150 150 else:
151 151 log.info('Configuring locale subsystem based on environment variables')
152 152 try:
153 153 # If self.locale is the empty string, then the locale
154 154 # module will use the environment variables. See the
155 155 # documentation of the package `locale`.
156 156 locale.setlocale(locale.LC_ALL, self.locale)
157 157
158 158 language_code, encoding = locale.getlocale()
159 159 log.info(
160 160 'Locale set to language code "%s" with encoding "%s".',
161 161 language_code, encoding)
162 162 except locale.Error:
163 163 log.exception('Cannot set locale, not configuring the locale system')
164 164
165 165
166 166 class WsgiProxy(object):
167 167 def __init__(self, wsgi):
168 168 self.wsgi = wsgi
169 169
170 170 def __call__(self, environ, start_response):
171 171 input_data = environ['wsgi.input'].read()
172 172 input_data = msgpack.unpackb(input_data)
173 173
174 174 error = None
175 175 try:
176 176 data, status, headers = self.wsgi.handle(
177 177 input_data['environment'], input_data['input_data'],
178 178 *input_data['args'], **input_data['kwargs'])
179 179 except Exception as e:
180 180 data, status, headers = [], None, None
181 181 error = {
182 182 'message': str(e),
183 183 '_vcs_kind': getattr(e, '_vcs_kind', None)
184 184 }
185 185
186 186 start_response(200, {})
187 187 return self._iterator(error, status, headers, data)
188 188
189 189 def _iterator(self, error, status, headers, data):
190 190 initial_data = [
191 191 error,
192 192 status,
193 193 headers,
194 194 ]
195 195
196 196 for d in chain(initial_data, data):
197 197 yield msgpack.packb(d)
198 198
199 199
200 200 def not_found(request):
201 201 return {'status': '404 NOT FOUND'}
202 202
203 203
204 204 class VCSViewPredicate(object):
205 205 def __init__(self, val, config):
206 206 self.remotes = val
207 207
208 208 def text(self):
209 return 'vcs view method = %s' % (list(self.remotes.keys()),)
209 return f'vcs view method = {list(self.remotes.keys())}'
210 210
211 211 phash = text
212 212
213 213 def __call__(self, context, request):
214 214 """
215 215 View predicate that returns true if given backend is supported by
216 216 defined remotes.
217 217 """
218 218 backend = request.matchdict.get('backend')
219 219 return backend in self.remotes
220 220
221 221
222 222 class HTTPApplication(object):
223 223 ALLOWED_EXCEPTIONS = ('KeyError', 'URLError')
224 224
225 225 remote_wsgi = remote_wsgi
226 226 _use_echo_app = False
227 227
228 228 def __init__(self, settings=None, global_config=None):
229 229
230 230 self.config = Configurator(settings=settings)
231 231 # Init our statsd at very start
232 232 self.config.registry.statsd = StatsdClient.statsd
233 233 self.config.registry.vcs_call_context = {}
234 234
235 235 self.global_config = global_config
236 236 self.config.include('vcsserver.lib.rc_cache')
237 237
238 238 settings_locale = settings.get('locale', '') or 'en_US.UTF-8'
239 239 vcs = VCS(locale_conf=settings_locale, cache_config=settings)
240 240 self._remotes = {
241 241 'hg': vcs._hg_remote,
242 242 'git': vcs._git_remote,
243 243 'svn': vcs._svn_remote,
244 244 'server': vcs._vcsserver,
245 245 }
246 246 if settings.get('dev.use_echo_app', 'false').lower() == 'true':
247 247 self._use_echo_app = True
248 248 log.warning("Using EchoApp for VCS operations.")
249 249 self.remote_wsgi = remote_wsgi_stub
250 250
251 251 self._configure_settings(global_config, settings)
252 252
253 253 self._configure()
254 254
255 255 def _configure_settings(self, global_config, app_settings):
256 256 """
257 257 Configure the settings module.
258 258 """
259 259 settings_merged = global_config.copy()
260 260 settings_merged.update(app_settings)
261 261
262 262 git_path = app_settings.get('git_path', None)
263 263 if git_path:
264 264 settings.GIT_EXECUTABLE = git_path
265 265 binary_dir = app_settings.get('core.binary_dir', None)
266 266 if binary_dir:
267 267 settings.BINARY_DIR = binary_dir
268 268
269 269 # Store the settings to make them available to other modules.
270 270 vcsserver.PYRAMID_SETTINGS = settings_merged
271 271 vcsserver.CONFIG = settings_merged
272 272
273 273 def _configure(self):
274 274 self.config.add_renderer(name='msgpack', factory=self._msgpack_renderer_factory)
275 275
276 276 self.config.add_route('service', '/_service')
277 277 self.config.add_route('status', '/status')
278 278 self.config.add_route('hg_proxy', '/proxy/hg')
279 279 self.config.add_route('git_proxy', '/proxy/git')
280 280
281 281 # rpc methods
282 282 self.config.add_route('vcs', '/{backend}')
283 283
284 284 # streaming rpc remote methods
285 285 self.config.add_route('vcs_stream', '/{backend}/stream')
286 286
287 287 # vcs operations clone/push as streaming
288 288 self.config.add_route('stream_git', '/stream/git/*repo_name')
289 289 self.config.add_route('stream_hg', '/stream/hg/*repo_name')
290 290
291 291 self.config.add_view(self.status_view, route_name='status', renderer='json')
292 292 self.config.add_view(self.service_view, route_name='service', renderer='msgpack')
293 293
294 294 self.config.add_view(self.hg_proxy(), route_name='hg_proxy')
295 295 self.config.add_view(self.git_proxy(), route_name='git_proxy')
296 296 self.config.add_view(self.vcs_view, route_name='vcs', renderer='msgpack',
297 297 vcs_view=self._remotes)
298 298 self.config.add_view(self.vcs_stream_view, route_name='vcs_stream',
299 299 vcs_view=self._remotes)
300 300
301 301 self.config.add_view(self.hg_stream(), route_name='stream_hg')
302 302 self.config.add_view(self.git_stream(), route_name='stream_git')
303 303
304 304 self.config.add_view_predicate('vcs_view', VCSViewPredicate)
305 305
306 306 self.config.add_notfound_view(not_found, renderer='json')
307 307
308 308 self.config.add_view(self.handle_vcs_exception, context=Exception)
309 309
310 310 self.config.add_tween(
311 311 'vcsserver.tweens.request_wrapper.RequestWrapperTween',
312 312 )
313 313 self.config.add_request_method(
314 314 'vcsserver.lib.request_counter.get_request_counter',
315 315 'request_count')
316 316
317 317 def wsgi_app(self):
318 318 return self.config.make_wsgi_app()
319 319
320 320 def _vcs_view_params(self, request):
321 321 remote = self._remotes[request.matchdict['backend']]
322 322 payload = msgpack.unpackb(request.body, use_list=True)
323 323
324 324 method = payload.get('method')
325 325 params = payload['params']
326 326 wire = params.get('wire')
327 327 args = params.get('args')
328 328 kwargs = params.get('kwargs')
329 329 context_uid = None
330 330
331 331 request.registry.vcs_call_context = {
332 332 'method': method,
333 333 'repo_name': payload.get('_repo_name')
334 334 }
335 335
336 336 if wire:
337 337 try:
338 338 wire['context'] = context_uid = uuid.UUID(wire['context'])
339 339 except KeyError:
340 340 pass
341 341 args.insert(0, wire)
342 342 repo_state_uid = wire.get('repo_state_uid') if wire else None
343 343
344 344 # NOTE(marcink): trading complexity for slight performance
345 345 if log.isEnabledFor(logging.DEBUG):
346 346 # also we SKIP printing out any of those methods args since they maybe excessive
347 347 just_args_methods = {
348 348 'commitctx': ('content', 'removed', 'updated')
349 349 }
350 350 if method in just_args_methods:
351 351 skip_args = just_args_methods[method]
352 352 call_args = ''
353 353 call_kwargs = {}
354 354 for k in kwargs:
355 355 if k in skip_args:
356 356 # replace our skip key with dummy
357 357 call_kwargs[k] = f'RemovedParam({k})'
358 358 else:
359 359 call_kwargs[k] = kwargs[k]
360 360 else:
361 361 call_args = args[1:]
362 362 call_kwargs = kwargs
363 363
364 364 log.debug('Method requested:`%s` with args:%s kwargs:%s context_uid: %s, repo_state_uid:%s',
365 365 method, call_args, call_kwargs, context_uid, repo_state_uid)
366 366
367 367 statsd = request.registry.statsd
368 368 if statsd:
369 369 statsd.incr(
370 370 'vcsserver_method_total', tags=[
371 "method:{}".format(method),
371 f"method:{method}",
372 372 ])
373 373 return payload, remote, method, args, kwargs
374 374
375 375 def vcs_view(self, request):
376 376
377 377 payload, remote, method, args, kwargs = self._vcs_view_params(request)
378 378 payload_id = payload.get('id')
379 379
380 380 try:
381 381 resp = getattr(remote, method)(*args, **kwargs)
382 382 except Exception as e:
383 383 exc_info = list(sys.exc_info())
384 384 exc_type, exc_value, exc_traceback = exc_info
385 385
386 386 org_exc = getattr(e, '_org_exc', None)
387 387 org_exc_name = None
388 388 org_exc_tb = ''
389 389 if org_exc:
390 390 org_exc_name = org_exc.__class__.__name__
391 391 org_exc_tb = getattr(e, '_org_exc_tb', '')
392 392 # replace our "faked" exception with our org
393 393 exc_info[0] = org_exc.__class__
394 394 exc_info[1] = org_exc
395 395
396 396 should_store_exc = True
397 397 if org_exc:
398 398 def get_exc_fqn(_exc_obj):
399 399 module_name = getattr(org_exc.__class__, '__module__', 'UNKNOWN')
400 400 return module_name + '.' + org_exc_name
401 401
402 402 exc_fqn = get_exc_fqn(org_exc)
403 403
404 404 if exc_fqn in ['mercurial.error.RepoLookupError',
405 405 'vcsserver.exceptions.RefNotFoundException']:
406 406 should_store_exc = False
407 407
408 408 if should_store_exc:
409 409 store_exception(id(exc_info), exc_info, request_path=request.path)
410 410
411 411 tb_info = ''.join(
412 412 traceback.format_exception(exc_type, exc_value, exc_traceback))
413 413
414 414 type_ = e.__class__.__name__
415 415 if type_ not in self.ALLOWED_EXCEPTIONS:
416 416 type_ = None
417 417
418 418 resp = {
419 419 'id': payload_id,
420 420 'error': {
421 421 'message': str(e),
422 422 'traceback': tb_info,
423 423 'org_exc': org_exc_name,
424 424 'org_exc_tb': org_exc_tb,
425 425 'type': type_
426 426 }
427 427 }
428 428
429 429 try:
430 430 resp['error']['_vcs_kind'] = getattr(e, '_vcs_kind', None)
431 431 except AttributeError:
432 432 pass
433 433 else:
434 434 resp = {
435 435 'id': payload_id,
436 436 'result': resp
437 437 }
438 438 log.debug('Serving data for method %s', method)
439 439 return resp
440 440
441 441 def vcs_stream_view(self, request):
442 442 payload, remote, method, args, kwargs = self._vcs_view_params(request)
443 443 # this method has a stream: marker we remove it here
444 444 method = method.split('stream:')[-1]
445 445 chunk_size = safe_int(payload.get('chunk_size')) or 4096
446 446
447 447 try:
448 448 resp = getattr(remote, method)(*args, **kwargs)
449 449 except Exception as e:
450 450 raise
451 451
452 452 def get_chunked_data(method_resp):
453 453 stream = io.BytesIO(method_resp)
454 454 while 1:
455 455 chunk = stream.read(chunk_size)
456 456 if not chunk:
457 457 break
458 458 yield chunk
459 459
460 460 response = Response(app_iter=get_chunked_data(resp))
461 461 response.content_type = 'application/octet-stream'
462 462
463 463 return response
464 464
465 465 def status_view(self, request):
466 466 import vcsserver
467 467 return {'status': 'OK', 'vcsserver_version': vcsserver.__version__,
468 468 'pid': os.getpid()}
469 469
470 470 def service_view(self, request):
471 471 import vcsserver
472 472
473 473 payload = msgpack.unpackb(request.body, use_list=True)
474 474 server_config, app_config = {}, {}
475 475
476 476 try:
477 477 path = self.global_config['__file__']
478 478 config = configparser.RawConfigParser()
479 479
480 480 config.read(path)
481 481
482 482 if config.has_section('server:main'):
483 483 server_config = dict(config.items('server:main'))
484 484 if config.has_section('app:main'):
485 485 app_config = dict(config.items('app:main'))
486 486
487 487 except Exception:
488 488 log.exception('Failed to read .ini file for display')
489 489
490 490 environ = list(os.environ.items())
491 491
492 492 resp = {
493 493 'id': payload.get('id'),
494 494 'result': dict(
495 495 version=vcsserver.__version__,
496 496 config=server_config,
497 497 app_config=app_config,
498 498 environ=environ,
499 499 payload=payload,
500 500 )
501 501 }
502 502 return resp
503 503
504 504 def _msgpack_renderer_factory(self, info):
505 505
506 506 def _render(value, system):
507 507 bin_type = False
508 508 res = value.get('result')
509 509 if res and isinstance(res, BinaryEnvelope):
510 510 log.debug('Result is wrapped in BinaryEnvelope type')
511 511 value['result'] = res.value
512 512 bin_type = res.bin_type
513 513
514 514 request = system.get('request')
515 515 if request is not None:
516 516 response = request.response
517 517 ct = response.content_type
518 518 if ct == response.default_content_type:
519 519 response.content_type = 'application/x-msgpack'
520 520 if bin_type:
521 521 response.content_type = 'application/x-msgpack-bin'
522 522
523 523 return msgpack.packb(value, use_bin_type=bin_type)
524 524 return _render
525 525
526 526 def set_env_from_config(self, environ, config):
527 527 dict_conf = {}
528 528 try:
529 529 for elem in config:
530 530 if elem[0] == 'rhodecode':
531 531 dict_conf = json.loads(elem[2])
532 532 break
533 533 except Exception:
534 534 log.exception('Failed to fetch SCM CONFIG')
535 535 return
536 536
537 537 username = dict_conf.get('username')
538 538 if username:
539 539 environ['REMOTE_USER'] = username
540 540 # mercurial specific, some extension api rely on this
541 541 environ['HGUSER'] = username
542 542
543 543 ip = dict_conf.get('ip')
544 544 if ip:
545 545 environ['REMOTE_HOST'] = ip
546 546
547 547 if _is_request_chunked(environ):
548 548 # set the compatibility flag for webob
549 549 environ['wsgi.input_terminated'] = True
550 550
551 551 def hg_proxy(self):
552 552 @wsgiapp
553 553 def _hg_proxy(environ, start_response):
554 554 app = WsgiProxy(self.remote_wsgi.HgRemoteWsgi())
555 555 return app(environ, start_response)
556 556 return _hg_proxy
557 557
558 558 def git_proxy(self):
559 559 @wsgiapp
560 560 def _git_proxy(environ, start_response):
561 561 app = WsgiProxy(self.remote_wsgi.GitRemoteWsgi())
562 562 return app(environ, start_response)
563 563 return _git_proxy
564 564
565 565 def hg_stream(self):
566 566 if self._use_echo_app:
567 567 @wsgiapp
568 568 def _hg_stream(environ, start_response):
569 569 app = EchoApp('fake_path', 'fake_name', None)
570 570 return app(environ, start_response)
571 571 return _hg_stream
572 572 else:
573 573 @wsgiapp
574 574 def _hg_stream(environ, start_response):
575 575 log.debug('http-app: handling hg stream')
576 576
577 577 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
578 578 call_context = msgpack.unpackb(packed_cc)
579 579
580 580 repo_path = call_context['repo_path']
581 581 repo_name = call_context['repo_name']
582 582 config = call_context['repo_config']
583 583
584 584 app = scm_app.create_hg_wsgi_app(
585 585 repo_path, repo_name, config)
586 586
587 587 # Consistent path information for hgweb
588 588 environ['PATH_INFO'] = call_context['path_info']
589 589 environ['REPO_NAME'] = repo_name
590 590 self.set_env_from_config(environ, config)
591 591
592 592 log.debug('http-app: starting app handler '
593 593 'with %s and process request', app)
594 594 return app(environ, ResponseFilter(start_response))
595 595 return _hg_stream
596 596
597 597 def git_stream(self):
598 598 if self._use_echo_app:
599 599 @wsgiapp
600 600 def _git_stream(environ, start_response):
601 601 app = EchoApp('fake_path', 'fake_name', None)
602 602 return app(environ, start_response)
603 603 return _git_stream
604 604 else:
605 605 @wsgiapp
606 606 def _git_stream(environ, start_response):
607 607 log.debug('http-app: handling git stream')
608 608
609 609 packed_cc = base64.b64decode(environ['HTTP_X_RC_VCS_STREAM_CALL_CONTEXT'])
610 610 call_context = msgpack.unpackb(packed_cc)
611 611
612 612 repo_path = call_context['repo_path']
613 613 repo_name = call_context['repo_name']
614 614 config = call_context['repo_config']
615 615
616 616 environ['PATH_INFO'] = call_context['path_info']
617 617 self.set_env_from_config(environ, config)
618 618
619 619 content_type = environ.get('CONTENT_TYPE', '')
620 620
621 621 path = environ['PATH_INFO']
622 622 is_lfs_request = GIT_LFS_CONTENT_TYPE in content_type
623 623 log.debug(
624 624 'LFS: Detecting if request `%s` is LFS server path based '
625 625 'on content type:`%s`, is_lfs:%s',
626 626 path, content_type, is_lfs_request)
627 627
628 628 if not is_lfs_request:
629 629 # fallback detection by path
630 630 if GIT_LFS_PROTO_PAT.match(path):
631 631 is_lfs_request = True
632 632 log.debug(
633 633 'LFS: fallback detection by path of: `%s`, is_lfs:%s',
634 634 path, is_lfs_request)
635 635
636 636 if is_lfs_request:
637 637 app = scm_app.create_git_lfs_wsgi_app(
638 638 repo_path, repo_name, config)
639 639 else:
640 640 app = scm_app.create_git_wsgi_app(
641 641 repo_path, repo_name, config)
642 642
643 643 log.debug('http-app: starting app handler '
644 644 'with %s and process request', app)
645 645
646 646 return app(environ, start_response)
647 647
648 648 return _git_stream
649 649
650 650 def handle_vcs_exception(self, exception, request):
651 651 _vcs_kind = getattr(exception, '_vcs_kind', '')
652 652 if _vcs_kind == 'repo_locked':
653 653 # Get custom repo-locked status code if present.
654 654 status_code = request.headers.get('X-RC-Locked-Status-Code')
655 655 return HTTPRepoLocked(
656 656 title=str(exception), status_code=status_code)
657 657
658 658 elif _vcs_kind == 'repo_branch_protected':
659 659 # Get custom repo-branch-protected status code if present.
660 660 return HTTPRepoBranchProtected(title=str(exception))
661 661
662 662 exc_info = request.exc_info
663 663 store_exception(id(exc_info), exc_info)
664 664
665 665 traceback_info = 'unavailable'
666 666 if request.exc_info:
667 667 exc_type, exc_value, exc_tb = request.exc_info
668 668 traceback_info = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))
669 669
670 670 log.error(
671 671 'error occurred handling this request for path: %s, \n tb: %s',
672 672 request.path, traceback_info)
673 673
674 674 statsd = request.registry.statsd
675 675 if statsd:
676 exc_type = "{}.{}".format(exception.__class__.__module__, exception.__class__.__name__)
676 exc_type = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
677 677 statsd.incr('vcsserver_exception_total',
678 tags=["type:{}".format(exc_type)])
678 tags=[f"type:{exc_type}"])
679 679 raise exception
680 680
681 681
682 682 class ResponseFilter(object):
683 683
684 684 def __init__(self, start_response):
685 685 self._start_response = start_response
686 686
687 687 def __call__(self, status, response_headers, exc_info=None):
688 688 headers = tuple(
689 689 (h, v) for h, v in response_headers
690 690 if not wsgiref.util.is_hop_by_hop(h))
691 691 return self._start_response(status, headers, exc_info)
692 692
693 693
694 694 def sanitize_settings_and_apply_defaults(global_config, settings):
695 695 global_settings_maker = SettingsMaker(global_config)
696 696 settings_maker = SettingsMaker(settings)
697 697
698 698 settings_maker.make_setting('logging.autoconfigure', False, parser='bool')
699 699
700 700 logging_conf = os.path.join(os.path.dirname(global_config.get('__file__')), 'logging.ini')
701 701 settings_maker.enable_logging(logging_conf)
702 702
703 703 # Default includes, possible to change as a user
704 704 pyramid_includes = settings_maker.make_setting('pyramid.includes', [], parser='list:newline')
705 705 log.debug("Using the following pyramid.includes: %s", pyramid_includes)
706 706
707 707 settings_maker.make_setting('__file__', global_config.get('__file__'))
708 708
709 709 settings_maker.make_setting('pyramid.default_locale_name', 'en')
710 710 settings_maker.make_setting('locale', 'en_US.UTF-8')
711 711
712 712 settings_maker.make_setting('core.binary_dir', '')
713 713
714 714 temp_store = tempfile.gettempdir()
715 715 default_cache_dir = os.path.join(temp_store, 'rc_cache')
716 716 # save default, cache dir, and use it for all backends later.
717 717 default_cache_dir = settings_maker.make_setting(
718 718 'cache_dir',
719 719 default=default_cache_dir, default_when_empty=True,
720 720 parser='dir:ensured')
721 721
722 722 # exception store cache
723 723 settings_maker.make_setting(
724 724 'exception_tracker.store_path',
725 725 default=os.path.join(default_cache_dir, 'exc_store'), default_when_empty=True,
726 726 parser='dir:ensured'
727 727 )
728 728
729 729 # repo_object cache defaults
730 730 settings_maker.make_setting(
731 731 'rc_cache.repo_object.backend',
732 732 default='dogpile.cache.rc.file_namespace',
733 733 parser='string')
734 734 settings_maker.make_setting(
735 735 'rc_cache.repo_object.expiration_time',
736 736 default=30 * 24 * 60 * 60, # 30days
737 737 parser='int')
738 738 settings_maker.make_setting(
739 739 'rc_cache.repo_object.arguments.filename',
740 740 default=os.path.join(default_cache_dir, 'vcsserver_cache_repo_object.db'),
741 741 parser='string')
742 742
743 743 # statsd
744 744 settings_maker.make_setting('statsd.enabled', False, parser='bool')
745 745 settings_maker.make_setting('statsd.statsd_host', 'statsd-exporter', parser='string')
746 746 settings_maker.make_setting('statsd.statsd_port', 9125, parser='int')
747 747 settings_maker.make_setting('statsd.statsd_prefix', '')
748 748 settings_maker.make_setting('statsd.statsd_ipv6', False, parser='bool')
749 749
750 750 settings_maker.env_expand()
751 751
752 752
753 753 def main(global_config, **settings):
754 754 start_time = time.time()
755 755 log.info('Pyramid app config starting')
756 756
757 757 if MercurialFactory:
758 758 hgpatches.patch_largefiles_capabilities()
759 759 hgpatches.patch_subrepo_type_mapping()
760 760
761 761 # Fill in and sanitize the defaults & do ENV expansion
762 762 sanitize_settings_and_apply_defaults(global_config, settings)
763 763
764 764 # init and bootstrap StatsdClient
765 765 StatsdClient.setup(settings)
766 766
767 767 pyramid_app = HTTPApplication(settings=settings, global_config=global_config).wsgi_app()
768 768 total_time = time.time() - start_time
769 769 log.info('Pyramid app `%s` created and configured in %.2fs',
770 770 getattr(pyramid_app, 'func_name', 'pyramid_app'), total_time)
771 771 return pyramid_app
772 772
773 773
@@ -1,174 +1,172 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 2 # Copyright (C) 2014-2020 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18
21 19 import os
22 20 import time
23 21 import datetime
24 22 import msgpack
25 23 import logging
26 24 import traceback
27 25 import tempfile
28 26
29 27 log = logging.getLogger(__name__)
30 28
31 29 # NOTE: Any changes should be synced with exc_tracking at rhodecode.lib.exc_tracking
32 30 global_prefix = 'vcsserver'
33 31 exc_store_dir_name = 'rc_exception_store_v1'
34 32
35 33
36 34 def exc_serialize(exc_id, tb, exc_type):
37 35
38 36 data = {
39 37 'version': 'v1',
40 38 'exc_id': exc_id,
41 39 'exc_utc_date': datetime.datetime.utcnow().isoformat(),
42 40 'exc_timestamp': repr(time.time()),
43 41 'exc_message': tb,
44 42 'exc_type': exc_type,
45 43 }
46 44 return msgpack.packb(data), data
47 45
48 46
49 47 def exc_unserialize(tb):
50 48 return msgpack.unpackb(tb)
51 49
52 50
53 51 def get_exc_store():
54 52 """
55 53 Get and create exception store if it's not existing
56 54 """
57 55 import vcsserver as app
58 56
59 57 exc_store_dir = app.CONFIG.get('exception_tracker.store_path', '') or tempfile.gettempdir()
60 58 _exc_store_path = os.path.join(exc_store_dir, exc_store_dir_name)
61 59
62 60 _exc_store_path = os.path.abspath(_exc_store_path)
63 61 if not os.path.isdir(_exc_store_path):
64 62 os.makedirs(_exc_store_path)
65 63 log.debug('Initializing exceptions store at %s', _exc_store_path)
66 64 return _exc_store_path
67 65
68 66
69 67 def _store_exception(exc_id, exc_info, prefix, request_path=''):
70 68 exc_type, exc_value, exc_traceback = exc_info
71 69
72 70 tb = ''.join(traceback.format_exception(
73 71 exc_type, exc_value, exc_traceback, None))
74 72
75 73 detailed_tb = getattr(exc_value, '_org_exc_tb', None)
76 74
77 75 if detailed_tb:
78 76 remote_tb = detailed_tb
79 77 if isinstance(detailed_tb, str):
80 78 remote_tb = [detailed_tb]
81 79
82 80 tb += (
83 81 '\n+++ BEG SOURCE EXCEPTION +++\n\n'
84 82 '{}\n'
85 83 '+++ END SOURCE EXCEPTION +++\n'
86 84 ''.format('\n'.join(remote_tb))
87 85 )
88 86
89 87 # Avoid that remote_tb also appears in the frame
90 88 del remote_tb
91 89
92 90 exc_type_name = exc_type.__name__
93 91 exc_store_path = get_exc_store()
94 92 exc_data, org_data = exc_serialize(exc_id, tb, exc_type_name)
95 93 exc_pref_id = '{}_{}_{}'.format(exc_id, prefix, org_data['exc_timestamp'])
96 94 if not os.path.isdir(exc_store_path):
97 95 os.makedirs(exc_store_path)
98 96 stored_exc_path = os.path.join(exc_store_path, exc_pref_id)
99 97 with open(stored_exc_path, 'wb') as f:
100 98 f.write(exc_data)
101 99 log.debug('Stored generated exception %s as: %s', exc_id, stored_exc_path)
102 100
103 101 log.error(
104 102 'error occurred handling this request.\n'
105 103 'Path: `%s`, tb: %s',
106 104 request_path, tb)
107 105
108 106
109 107 def store_exception(exc_id, exc_info, prefix=global_prefix, request_path=''):
110 108 """
111 109 Example usage::
112 110
113 111 exc_info = sys.exc_info()
114 112 store_exception(id(exc_info), exc_info)
115 113 """
116 114
117 115 try:
118 116 _store_exception(exc_id=exc_id, exc_info=exc_info, prefix=prefix,
119 117 request_path=request_path)
120 118 except Exception:
121 119 log.exception('Failed to store exception `%s` information', exc_id)
122 120 # there's no way this can fail, it will crash server badly if it does.
123 121 pass
124 122
125 123
126 124 def _find_exc_file(exc_id, prefix=global_prefix):
127 125 exc_store_path = get_exc_store()
128 126 if prefix:
129 exc_id = '{}_{}'.format(exc_id, prefix)
127 exc_id = f'{exc_id}_{prefix}'
130 128 else:
131 129 # search without a prefix
132 exc_id = '{}'.format(exc_id)
130 exc_id = f'{exc_id}'
133 131
134 132 # we need to search the store for such start pattern as above
135 133 for fname in os.listdir(exc_store_path):
136 134 if fname.startswith(exc_id):
137 135 exc_id = os.path.join(exc_store_path, fname)
138 136 break
139 137 continue
140 138 else:
141 139 exc_id = None
142 140
143 141 return exc_id
144 142
145 143
146 144 def _read_exception(exc_id, prefix):
147 145 exc_id_file_path = _find_exc_file(exc_id=exc_id, prefix=prefix)
148 146 if exc_id_file_path:
149 147 with open(exc_id_file_path, 'rb') as f:
150 148 return exc_unserialize(f.read())
151 149 else:
152 150 log.debug('Exception File `%s` not found', exc_id_file_path)
153 151 return None
154 152
155 153
156 154 def read_exception(exc_id, prefix=global_prefix):
157 155 try:
158 156 return _read_exception(exc_id=exc_id, prefix=prefix)
159 157 except Exception:
160 158 log.exception('Failed to read exception `%s` information', exc_id)
161 159 # there's no way this can fail, it will crash server badly if it does.
162 160 return None
163 161
164 162
165 163 def delete_exception(exc_id, prefix=global_prefix):
166 164 try:
167 165 exc_id_file_path = _find_exc_file(exc_id, prefix=prefix)
168 166 if exc_id_file_path:
169 167 os.remove(exc_id_file_path)
170 168
171 169 except Exception:
172 170 log.exception('Failed to remove exception `%s` information', exc_id)
173 171 # there's no way this can fail, it will crash server badly if it does.
174 172 pass
@@ -1,65 +1,63 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 2 # Copyright (C) 2014-2020 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18
21 19 import logging
22 20
23 21 from repoze.lru import LRUCache
24 22
25 23 from vcsserver.str_utils import safe_str
26 24
27 25 log = logging.getLogger(__name__)
28 26
29 27
30 28 class LRUDict(LRUCache):
31 29 """
32 30 Wrapper to provide partial dict access
33 31 """
34 32
35 33 def __setitem__(self, key, value):
36 34 return self.put(key, value)
37 35
38 36 def __getitem__(self, key):
39 37 return self.get(key)
40 38
41 39 def __contains__(self, key):
42 40 return bool(self.get(key))
43 41
44 42 def __delitem__(self, key):
45 43 del self.data[key]
46 44
47 45 def keys(self):
48 46 return list(self.data.keys())
49 47
50 48
51 49 class LRUDictDebug(LRUDict):
52 50 """
53 51 Wrapper to provide some debug options
54 52 """
55 53 def _report_keys(self):
56 elems_cnt = '%s/%s' % (len(list(self.keys())), self.size)
54 elems_cnt = '{}/{}'.format(len(list(self.keys())), self.size)
57 55 # trick for pformat print it more nicely
58 56 fmt = '\n'
59 57 for cnt, elem in enumerate(self.keys()):
60 fmt += '%s - %s\n' % (cnt+1, safe_str(elem))
58 fmt += '{} - {}\n'.format(cnt+1, safe_str(elem))
61 59 log.debug('current LRU keys (%s):%s', elems_cnt, fmt)
62 60
63 61 def __getitem__(self, key):
64 62 self._report_keys()
65 63 return self.get(key)
@@ -1,110 +1,108 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 2 # Copyright (C) 2014-2020 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18 import logging
21 19 import threading
22 20
23 21 from dogpile.cache import register_backend
24 22
25 23 from . import region_meta
26 24 from .utils import (
27 25 backend_key_generator,
28 26 clear_cache_namespace,
29 27 get_default_cache_settings,
30 28 get_or_create_region,
31 29 make_region,
32 30 str2bool,
33 31 )
34 32
35 33 module_name = 'vcsserver'
36 34
37 35 register_backend(
38 36 "dogpile.cache.rc.memory_lru", f"{module_name}.lib.rc_cache.backends",
39 37 "LRUMemoryBackend")
40 38
41 39 register_backend(
42 40 "dogpile.cache.rc.file_namespace", f"{module_name}.lib.rc_cache.backends",
43 41 "FileNamespaceBackend")
44 42
45 43 register_backend(
46 44 "dogpile.cache.rc.redis", f"{module_name}.lib.rc_cache.backends",
47 45 "RedisPickleBackend")
48 46
49 47 register_backend(
50 48 "dogpile.cache.rc.redis_msgpack", f"{module_name}.lib.rc_cache.backends",
51 49 "RedisMsgPackBackend")
52 50
53 51
54 52 log = logging.getLogger(__name__)
55 53
56 54
57 55 def async_creation_runner(cache, somekey, creator, mutex):
58 56
59 57 def runner():
60 58 try:
61 59 value = creator()
62 60 cache.set(somekey, value)
63 61 finally:
64 62 mutex.release()
65 63
66 64 thread = threading.Thread(target=runner)
67 65 thread.start()
68 66
69 67
70 68 def configure_dogpile_cache(settings):
71 69 cache_dir = settings.get('cache_dir')
72 70 if cache_dir:
73 71 region_meta.dogpile_config_defaults['cache_dir'] = cache_dir
74 72
75 73 rc_cache_data = get_default_cache_settings(settings, prefixes=['rc_cache.'])
76 74
77 75 # inspect available namespaces
78 76 avail_regions = set()
79 77 for key in rc_cache_data.keys():
80 78 namespace_name = key.split('.', 1)[0]
81 79 if namespace_name in avail_regions:
82 80 continue
83 81
84 82 avail_regions.add(namespace_name)
85 83 log.debug('dogpile: found following cache regions: %s', namespace_name)
86 84
87 85 new_region = make_region(
88 86 name=namespace_name,
89 87 function_key_generator=None,
90 88 async_creation_runner=None
91 89 )
92 90
93 91 new_region.configure_from_config(settings, f'rc_cache.{namespace_name}.')
94 92 new_region.function_key_generator = backend_key_generator(new_region.actual_backend)
95 93
96 94 async_creator = str2bool(settings.pop(f'rc_cache.{namespace_name}.async_creator', 'false'))
97 95 if async_creator:
98 96 log.debug('configuring region %s with async creator', new_region)
99 97 new_region.async_creation_runner = async_creation_runner
100 98
101 99 if log.isEnabledFor(logging.DEBUG):
102 100 region_args = dict(backend=new_region.actual_backend,
103 101 region_invalidator=new_region.region_invalidator.__class__)
104 102 log.debug('dogpile: registering a new region `%s` %s', namespace_name, region_args)
105 103
106 104 region_meta.dogpile_cache_regions[namespace_name] = new_region
107 105
108 106
109 107 def includeme(config):
110 108 configure_dogpile_cache(config.registry.settings)
@@ -1,261 +1,261 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import errno
19 19 import fcntl
20 20 import functools
21 21 import logging
22 22 import os
23 23 import pickle
24 24 #import time
25 25
26 26 #import gevent
27 27 import msgpack
28 28 import redis
29 29
30 30 flock_org = fcntl.flock
31 31 from typing import Union
32 32
33 33 from dogpile.cache.api import Deserializer, Serializer
34 34 from dogpile.cache.backends import file as file_backend
35 35 from dogpile.cache.backends import memory as memory_backend
36 36 from dogpile.cache.backends import redis as redis_backend
37 37 from dogpile.cache.backends.file import FileLock
38 38 from dogpile.cache.util import memoized_property
39 39
40 40 from vcsserver.lib.memory_lru_dict import LRUDict, LRUDictDebug
41 41 from vcsserver.str_utils import safe_bytes, safe_str
42 42 from vcsserver.type_utils import str2bool
43 43
44 44 _default_max_size = 1024
45 45
46 46 log = logging.getLogger(__name__)
47 47
48 48
49 49 class LRUMemoryBackend(memory_backend.MemoryBackend):
50 50 key_prefix = 'lru_mem_backend'
51 51 pickle_values = False
52 52
53 53 def __init__(self, arguments):
54 54 self.max_size = arguments.pop('max_size', _default_max_size)
55 55
56 56 LRUDictClass = LRUDict
57 57 if arguments.pop('log_key_count', None):
58 58 LRUDictClass = LRUDictDebug
59 59
60 60 arguments['cache_dict'] = LRUDictClass(self.max_size)
61 super(LRUMemoryBackend, self).__init__(arguments)
61 super().__init__(arguments)
62 62
63 63 def __repr__(self):
64 64 return f'{self.__class__}(maxsize=`{self.max_size}`)'
65 65
66 66 def __str__(self):
67 67 return self.__repr__()
68 68
69 69 def delete(self, key):
70 70 try:
71 71 del self._cache[key]
72 72 except KeyError:
73 73 # we don't care if key isn't there at deletion
74 74 pass
75 75
76 76 def delete_multi(self, keys):
77 77 for key in keys:
78 78 self.delete(key)
79 79
80 80
81 81 class PickleSerializer:
82 serializer: Union[None, Serializer] = staticmethod( # type: ignore
82 serializer: None | Serializer = staticmethod( # type: ignore
83 83 functools.partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
84 84 )
85 deserializer: Union[None, Deserializer] = staticmethod( # type: ignore
85 deserializer: None | Deserializer = staticmethod( # type: ignore
86 86 functools.partial(pickle.loads)
87 87 )
88 88
89 89
90 90 class MsgPackSerializer(object):
91 serializer: Union[None, Serializer] = staticmethod( # type: ignore
91 serializer: None | Serializer = staticmethod( # type: ignore
92 92 msgpack.packb
93 93 )
94 deserializer: Union[None, Deserializer] = staticmethod( # type: ignore
94 deserializer: None | Deserializer = staticmethod( # type: ignore
95 95 functools.partial(msgpack.unpackb, use_list=False)
96 96 )
97 97
98 98
99 99 class CustomLockFactory(FileLock):
100 100
101 101 pass
102 102
103 103
104 104 class FileNamespaceBackend(PickleSerializer, file_backend.DBMBackend):
105 105 key_prefix = 'file_backend'
106 106
107 107 def __init__(self, arguments):
108 108 arguments['lock_factory'] = CustomLockFactory
109 109 db_file = arguments.get('filename')
110 110
111 111 log.debug('initialing cache-backend=%s db in %s', self.__class__.__name__, db_file)
112 112 db_file_dir = os.path.dirname(db_file)
113 113 if not os.path.isdir(db_file_dir):
114 114 os.makedirs(db_file_dir)
115 115
116 116 try:
117 super(FileNamespaceBackend, self).__init__(arguments)
117 super().__init__(arguments)
118 118 except Exception:
119 119 log.exception('Failed to initialize db at: %s', db_file)
120 120 raise
121 121
122 122 def __repr__(self):
123 123 return f'{self.__class__}(file=`{self.filename}`)'
124 124
125 125 def __str__(self):
126 126 return self.__repr__()
127 127
128 128 def list_keys(self, prefix: bytes = b''):
129 129 prefix = b'%b:%b' % (safe_bytes(self.key_prefix), safe_bytes(prefix))
130 130
131 131 def cond(dbm_key: bytes):
132 132 if not prefix:
133 133 return True
134 134
135 135 if dbm_key.startswith(prefix):
136 136 return True
137 137 return False
138 138
139 139 with self._dbm_file(True) as dbm:
140 140 try:
141 141 return list(filter(cond, dbm.keys()))
142 142 except Exception:
143 143 log.error('Failed to fetch DBM keys from DB: %s', self.get_store())
144 144 raise
145 145
146 146 def get_store(self):
147 147 return self.filename
148 148
149 149
150 150 class BaseRedisBackend(redis_backend.RedisBackend):
151 151 key_prefix = ''
152 152
153 153 def __init__(self, arguments):
154 154 self.db_conn = arguments.get('host', '') or arguments.get('url', '') or 'redis-host'
155 super(BaseRedisBackend, self).__init__(arguments)
155 super().__init__(arguments)
156 156
157 157 self._lock_timeout = self.lock_timeout
158 158 self._lock_auto_renewal = str2bool(arguments.pop("lock_auto_renewal", True))
159 159
160 160 if self._lock_auto_renewal and not self._lock_timeout:
161 161 # set default timeout for auto_renewal
162 162 self._lock_timeout = 30
163 163
164 164 def __repr__(self):
165 165 return f'{self.__class__}(conn=`{self.db_conn}`)'
166 166
167 167 def __str__(self):
168 168 return self.__repr__()
169 169
170 170 def _create_client(self):
171 171 args = {}
172 172
173 173 if self.url is not None:
174 174 args.update(url=self.url)
175 175
176 176 else:
177 177 args.update(
178 178 host=self.host, password=self.password,
179 179 port=self.port, db=self.db
180 180 )
181 181
182 182 connection_pool = redis.ConnectionPool(**args)
183 183 self.writer_client = redis.StrictRedis(
184 184 connection_pool=connection_pool
185 185 )
186 186 self.reader_client = self.writer_client
187 187
188 188 def list_keys(self, prefix=''):
189 189 prefix = f'{self.key_prefix}:{prefix}*'
190 190 return self.reader_client.keys(prefix)
191 191
192 192 def get_store(self):
193 193 return self.reader_client.connection_pool
194 194
195 195 def get_mutex(self, key):
196 196 if self.distributed_lock:
197 197 lock_key = f'_lock_{safe_str(key)}'
198 198 return get_mutex_lock(
199 199 self.writer_client, lock_key,
200 200 self._lock_timeout,
201 201 auto_renewal=self._lock_auto_renewal
202 202 )
203 203 else:
204 204 return None
205 205
206 206
207 207 class RedisPickleBackend(PickleSerializer, BaseRedisBackend):
208 208 key_prefix = 'redis_pickle_backend'
209 209 pass
210 210
211 211
212 212 class RedisMsgPackBackend(MsgPackSerializer, BaseRedisBackend):
213 213 key_prefix = 'redis_msgpack_backend'
214 214 pass
215 215
216 216
217 217 def get_mutex_lock(client, lock_key, lock_timeout, auto_renewal=False):
218 218 from vcsserver.lib._vendor import redis_lock
219 219
220 220 class _RedisLockWrapper(object):
221 221 """LockWrapper for redis_lock"""
222 222
223 223 @classmethod
224 224 def get_lock(cls):
225 225 return redis_lock.Lock(
226 226 redis_client=client,
227 227 name=lock_key,
228 228 expire=lock_timeout,
229 229 auto_renewal=auto_renewal,
230 230 strict=True,
231 231 )
232 232
233 233 def __repr__(self):
234 234 return f"{self.__class__.__name__}:{lock_key}"
235 235
236 236 def __str__(self):
237 237 return f"{self.__class__.__name__}:{lock_key}"
238 238
239 239 def __init__(self):
240 240 self.lock = self.get_lock()
241 241 self.lock_key = lock_key
242 242
243 243 def acquire(self, wait=True):
244 244 log.debug('Trying to acquire Redis lock for key %s', self.lock_key)
245 245 try:
246 246 acquired = self.lock.acquire(wait)
247 247 log.debug('Got lock for key %s, %s', self.lock_key, acquired)
248 248 return acquired
249 249 except redis_lock.AlreadyAcquired:
250 250 return False
251 251 except redis_lock.AlreadyStarted:
252 252 # refresh thread exists, but it also means we acquired the lock
253 253 return True
254 254
255 255 def release(self):
256 256 try:
257 257 self.lock.release()
258 258 except redis_lock.NotAcquired:
259 259 pass
260 260
261 261 return _RedisLockWrapper()
@@ -1,232 +1,232 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import functools
19 19 import logging
20 20 import os
21 21 import threading
22 22 import time
23 23
24 24 import decorator
25 25 from dogpile.cache import CacheRegion
26 26
27 27 from vcsserver.lib.rc_cache import region_meta
28 28 from vcsserver.str_utils import safe_bytes
29 29 from vcsserver.type_utils import str2bool
30 30 from vcsserver.utils import sha1
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 class RhodeCodeCacheRegion(CacheRegion):
36 36
37 37 def __repr__(self):
38 38 return f'{self.__class__}(name={self.name})'
39 39
40 40 def conditional_cache_on_arguments(
41 41 self, namespace=None,
42 42 expiration_time=None,
43 43 should_cache_fn=None,
44 44 to_str=str,
45 45 function_key_generator=None,
46 46 condition=True):
47 47 """
48 48 Custom conditional decorator, that will not touch any dogpile internals if
49 49 condition isn't meet. This works a bit different from should_cache_fn
50 50 And it's faster in cases we don't ever want to compute cached values
51 51 """
52 52 expiration_time_is_callable = callable(expiration_time)
53 53
54 54 if function_key_generator is None:
55 55 function_key_generator = self.function_key_generator
56 56
57 57 def get_or_create_for_user_func(key_generator, user_func, *arg, **kw):
58 58
59 59 if not condition:
60 60 log.debug('Calling un-cached method:%s', user_func.__name__)
61 61 start = time.time()
62 62 result = user_func(*arg, **kw)
63 63 total = time.time() - start
64 64 log.debug('un-cached method:%s took %.4fs', user_func.__name__, total)
65 65 return result
66 66
67 67 key = key_generator(*arg, **kw)
68 68
69 69 timeout = expiration_time() if expiration_time_is_callable \
70 70 else expiration_time
71 71
72 72 log.debug('Calling cached method:`%s`', user_func.__name__)
73 73 return self.get_or_create(key, user_func, timeout, should_cache_fn, (arg, kw))
74 74
75 75 def cache_decorator(user_func):
76 76 if to_str is str:
77 77 # backwards compatible
78 78 key_generator = function_key_generator(namespace, user_func)
79 79 else:
80 80 key_generator = function_key_generator(namespace, user_func, to_str=to_str)
81 81
82 82 def refresh(*arg, **kw):
83 83 """
84 84 Like invalidate, but regenerates the value instead
85 85 """
86 86 key = key_generator(*arg, **kw)
87 87 value = user_func(*arg, **kw)
88 88 self.set(key, value)
89 89 return value
90 90
91 91 def invalidate(*arg, **kw):
92 92 key = key_generator(*arg, **kw)
93 93 self.delete(key)
94 94
95 95 def set_(value, *arg, **kw):
96 96 key = key_generator(*arg, **kw)
97 97 self.set(key, value)
98 98
99 99 def get(*arg, **kw):
100 100 key = key_generator(*arg, **kw)
101 101 return self.get(key)
102 102
103 103 user_func.set = set_
104 104 user_func.invalidate = invalidate
105 105 user_func.get = get
106 106 user_func.refresh = refresh
107 107 user_func.key_generator = key_generator
108 108 user_func.original = user_func
109 109
110 110 # Use `decorate` to preserve the signature of :param:`user_func`.
111 111 return decorator.decorate(user_func, functools.partial(
112 112 get_or_create_for_user_func, key_generator))
113 113
114 114 return cache_decorator
115 115
116 116
117 117 def make_region(*arg, **kw):
118 118 return RhodeCodeCacheRegion(*arg, **kw)
119 119
120 120
121 121 def get_default_cache_settings(settings, prefixes=None):
122 122 prefixes = prefixes or []
123 123 cache_settings = {}
124 124 for key in settings.keys():
125 125 for prefix in prefixes:
126 126 if key.startswith(prefix):
127 127 name = key.split(prefix)[1].strip()
128 128 val = settings[key]
129 129 if isinstance(val, str):
130 130 val = val.strip()
131 131 cache_settings[name] = val
132 132 return cache_settings
133 133
134 134
135 135 def compute_key_from_params(*args):
136 136 """
137 137 Helper to compute key from given params to be used in cache manager
138 138 """
139 139 return sha1(safe_bytes("_".join(map(str, args))))
140 140
141 141
142 142 def backend_key_generator(backend):
143 143 """
144 144 Special wrapper that also sends over the backend to the key generator
145 145 """
146 146 def wrapper(namespace, fn):
147 147 return key_generator(backend, namespace, fn)
148 148 return wrapper
149 149
150 150
151 151 def key_generator(backend, namespace, fn):
152 152 func_name = fn.__name__
153 153
154 154 def generate_key(*args):
155 155 backend_prefix = getattr(backend, 'key_prefix', None) or 'backend_prefix'
156 156 namespace_pref = namespace or 'default_namespace'
157 157 arg_key = compute_key_from_params(*args)
158 158 final_key = f"{backend_prefix}:{namespace_pref}:{func_name}_{arg_key}"
159 159
160 160 return final_key
161 161
162 162 return generate_key
163 163
164 164
165 165 def get_or_create_region(region_name, region_namespace: str = None):
166 166 from vcsserver.lib.rc_cache.backends import FileNamespaceBackend
167 167
168 168 region_obj = region_meta.dogpile_cache_regions.get(region_name)
169 169 if not region_obj:
170 170 reg_keys = list(region_meta.dogpile_cache_regions.keys())
171 raise EnvironmentError(f'Region `{region_name}` not in configured: {reg_keys}.')
171 raise OSError(f'Region `{region_name}` not in configured: {reg_keys}.')
172 172
173 173 region_uid_name = f'{region_name}:{region_namespace}'
174 174
175 175 if isinstance(region_obj.actual_backend, FileNamespaceBackend):
176 176 if not region_namespace:
177 177 raise ValueError(f'{FileNamespaceBackend} used requires to specify region_namespace param')
178 178
179 179 region_exist = region_meta.dogpile_cache_regions.get(region_namespace)
180 180 if region_exist:
181 181 log.debug('Using already configured region: %s', region_namespace)
182 182 return region_exist
183 183
184 184 expiration_time = region_obj.expiration_time
185 185
186 186 cache_dir = region_meta.dogpile_config_defaults['cache_dir']
187 187 namespace_cache_dir = cache_dir
188 188
189 189 # we default the namespace_cache_dir to our default cache dir.
190 190 # however if this backend is configured with filename= param, we prioritize that
191 191 # so all caches within that particular region, even those namespaced end up in the same path
192 192 if region_obj.actual_backend.filename:
193 193 namespace_cache_dir = os.path.dirname(region_obj.actual_backend.filename)
194 194
195 195 if not os.path.isdir(namespace_cache_dir):
196 196 os.makedirs(namespace_cache_dir)
197 197 new_region = make_region(
198 198 name=region_uid_name,
199 199 function_key_generator=backend_key_generator(region_obj.actual_backend)
200 200 )
201 201
202 202 namespace_filename = os.path.join(
203 203 namespace_cache_dir, f"{region_name}_{region_namespace}.cache_db")
204 204 # special type that allows 1db per namespace
205 205 new_region.configure(
206 206 backend='dogpile.cache.rc.file_namespace',
207 207 expiration_time=expiration_time,
208 208 arguments={"filename": namespace_filename}
209 209 )
210 210
211 211 # create and save in region caches
212 212 log.debug('configuring new region: %s', region_uid_name)
213 213 region_obj = region_meta.dogpile_cache_regions[region_namespace] = new_region
214 214
215 215 return region_obj
216 216
217 217
218 218 def clear_cache_namespace(cache_region: str | RhodeCodeCacheRegion, cache_namespace_uid: str, invalidate: bool = False, hard: bool = False):
219 219 if not isinstance(cache_region, RhodeCodeCacheRegion):
220 220 cache_region = get_or_create_region(cache_region, cache_namespace_uid)
221 221
222 222 cache_keys = cache_region.backend.list_keys(prefix=cache_namespace_uid)
223 223 num_delete_keys = len(cache_keys)
224 224 if invalidate:
225 225 # NOTE: The CacheRegion.invalidate() method’s default mode of
226 226 # operation is to set a timestamp local to this CacheRegion in this Python process only.
227 227 # It does not impact other Python processes or regions as the timestamp is only stored locally in memory.
228 228 cache_region.invalidate(hard=hard)
229 229 else:
230 230 if num_delete_keys:
231 231 cache_region.delete_multi(cache_keys)
232 232 return num_delete_keys
@@ -1,27 +1,25 b''
1 # -*- coding: utf-8 -*-
2
3 1 # RhodeCode VCSServer provides access to different vcs backends via network.
4 2 # Copyright (C) 2014-2020 RhodeCode GmbH
5 3 #
6 4 # This program is free software; you can redistribute it and/or modify
7 5 # it under the terms of the GNU General Public License as published by
8 6 # the Free Software Foundation; either version 3 of the License, or
9 7 # (at your option) any later version.
10 8 #
11 9 # This program is distributed in the hope that it will be useful,
12 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 12 # GNU General Public License for more details.
15 13 #
16 14 # You should have received a copy of the GNU General Public License
17 15 # along with this program; if not, write to the Free Software Foundation,
18 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 17
20 18
21 19 counter = 0
22 20
23 21
24 22 def get_request_counter(request):
25 23 global counter
26 24 counter += 1
27 25 return counter
@@ -1,70 +1,70 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 from vcsserver.lib._vendor.statsd import client_from_config
19 19
20 20
21 21 class StatsdClientNotInitialised(Exception):
22 22 pass
23 23
24 24
25 25 class _Singleton(type):
26 26 """A metaclass that creates a Singleton base class when called."""
27 27
28 28 _instances = {}
29 29
30 30 def __call__(cls, *args, **kwargs):
31 31 if cls not in cls._instances:
32 cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
32 cls._instances[cls] = super().__call__(*args, **kwargs)
33 33 return cls._instances[cls]
34 34
35 35
36 36 class Singleton(_Singleton("SingletonMeta", (object,), {})):
37 37 pass
38 38
39 39
40 40 class StatsdClientClass(Singleton):
41 41 setup_run = False
42 42 statsd_client = None
43 43 statsd = None
44 44 strict_mode_init = False
45 45
46 46 def __getattribute__(self, name):
47 47
48 48 if name.startswith("statsd"):
49 49 if self.setup_run:
50 return super(StatsdClientClass, self).__getattribute__(name)
50 return super().__getattribute__(name)
51 51 else:
52 52 if self.strict_mode_init:
53 53 raise StatsdClientNotInitialised(f"requested key was {name}")
54 54 return None
55 55
56 return super(StatsdClientClass, self).__getattribute__(name)
56 return super().__getattribute__(name)
57 57
58 58 def setup(self, settings):
59 59 """
60 60 Initialize the client
61 61 """
62 62 strict_init_mode = settings.pop('statsd_strict_init', False)
63 63
64 64 statsd = client_from_config(settings)
65 65 self.statsd = statsd
66 66 self.statsd_client = statsd
67 67 self.setup_run = True
68 68
69 69
70 70 StatsdClient = StatsdClientClass()
@@ -1,414 +1,414 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 """Handles the Git smart protocol."""
19 19
20 20 import os
21 21 import socket
22 22 import logging
23 23
24 24 import dulwich.protocol
25 25 from dulwich.protocol import CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K
26 26 from webob import Request, Response, exc
27 27
28 28 from vcsserver.lib.rc_json import json
29 29 from vcsserver import hooks, subprocessio
30 30 from vcsserver.str_utils import ascii_bytes
31 31
32 32
33 33 log = logging.getLogger(__name__)
34 34
35 35
36 36 class FileWrapper(object):
37 37 """File wrapper that ensures how much data is read from it."""
38 38
39 39 def __init__(self, fd, content_length):
40 40 self.fd = fd
41 41 self.content_length = content_length
42 42 self.remain = content_length
43 43
44 44 def read(self, size):
45 45 if size <= self.remain:
46 46 try:
47 47 data = self.fd.read(size)
48 48 except socket.error:
49 49 raise IOError(self)
50 50 self.remain -= size
51 51 elif self.remain:
52 52 data = self.fd.read(self.remain)
53 53 self.remain = 0
54 54 else:
55 55 data = None
56 56 return data
57 57
58 58 def __repr__(self):
59 return '<FileWrapper %s len: %s, read: %s>' % (
59 return '<FileWrapper {} len: {}, read: {}>'.format(
60 60 self.fd, self.content_length, self.content_length - self.remain
61 61 )
62 62
63 63
64 64 class GitRepository(object):
65 65 """WSGI app for handling Git smart protocol endpoints."""
66 66
67 67 git_folder_signature = frozenset(('config', 'head', 'info', 'objects', 'refs'))
68 68 commands = frozenset(('git-upload-pack', 'git-receive-pack'))
69 valid_accepts = frozenset(('application/x-{}-result'.format(c) for c in commands))
69 valid_accepts = frozenset(f'application/x-{c}-result' for c in commands)
70 70
71 71 # The last bytes are the SHA1 of the first 12 bytes.
72 72 EMPTY_PACK = (
73 73 b'PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08' +
74 74 b'\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e'
75 75 )
76 76 FLUSH_PACKET = b"0000"
77 77
78 78 SIDE_BAND_CAPS = frozenset((CAPABILITY_SIDE_BAND, CAPABILITY_SIDE_BAND_64K))
79 79
80 80 def __init__(self, repo_name, content_path, git_path, update_server_info, extras):
81 81 files = frozenset(f.lower() for f in os.listdir(content_path))
82 82 valid_dir_signature = self.git_folder_signature.issubset(files)
83 83
84 84 if not valid_dir_signature:
85 85 raise OSError('%s missing git signature' % content_path)
86 86
87 87 self.content_path = content_path
88 88 self.repo_name = repo_name
89 89 self.extras = extras
90 90 self.git_path = git_path
91 91 self.update_server_info = update_server_info
92 92
93 93 def _get_fixedpath(self, path):
94 94 """
95 95 Small fix for repo_path
96 96
97 97 :param path:
98 98 """
99 99 path = path.split(self.repo_name, 1)[-1]
100 100 if path.startswith('.git'):
101 101 # for bare repos we still get the .git prefix inside, we skip it
102 102 # here, and remove from the service command
103 103 path = path[4:]
104 104
105 105 return path.strip('/')
106 106
107 107 def inforefs(self, request, unused_environ):
108 108 """
109 109 WSGI Response producer for HTTP GET Git Smart
110 110 HTTP /info/refs request.
111 111 """
112 112
113 113 git_command = request.GET.get('service')
114 114 if git_command not in self.commands:
115 115 log.debug('command %s not allowed', git_command)
116 116 return exc.HTTPForbidden()
117 117
118 118 # please, resist the urge to add '\n' to git capture and increment
119 119 # line count by 1.
120 120 # by git docs: Documentation/technical/http-protocol.txt#L214 \n is
121 121 # a part of protocol.
122 122 # The code in Git client not only does NOT need '\n', but actually
123 123 # blows up if you sprinkle "flush" (0000) as "0001\n".
124 124 # It reads binary, per number of bytes specified.
125 125 # if you do add '\n' as part of data, count it.
126 126 server_advert = '# service=%s\n' % git_command
127 127 packet_len = hex(len(server_advert) + 4)[2:].rjust(4, '0').lower()
128 128 try:
129 129 gitenv = dict(os.environ)
130 130 # forget all configs
131 131 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
132 132 command = [self.git_path, git_command[4:], '--stateless-rpc',
133 133 '--advertise-refs', self.content_path]
134 134 out = subprocessio.SubprocessIOChunker(
135 135 command,
136 136 env=gitenv,
137 137 starting_values=[ascii_bytes(packet_len + server_advert) + self.FLUSH_PACKET],
138 138 shell=False
139 139 )
140 140 except OSError:
141 141 log.exception('Error processing command')
142 142 raise exc.HTTPExpectationFailed()
143 143
144 144 resp = Response()
145 145 resp.content_type = f'application/x-{git_command}-advertisement'
146 146 resp.charset = None
147 147 resp.app_iter = out
148 148
149 149 return resp
150 150
151 151 def _get_want_capabilities(self, request):
152 152 """Read the capabilities found in the first want line of the request."""
153 153 pos = request.body_file_seekable.tell()
154 154 first_line = request.body_file_seekable.readline()
155 155 request.body_file_seekable.seek(pos)
156 156
157 157 return frozenset(
158 158 dulwich.protocol.extract_want_line_capabilities(first_line)[1])
159 159
160 160 def _build_failed_pre_pull_response(self, capabilities, pre_pull_messages):
161 161 """
162 162 Construct a response with an empty PACK file.
163 163
164 164 We use an empty PACK file, as that would trigger the failure of the pull
165 165 or clone command.
166 166
167 167 We also print in the error output a message explaining why the command
168 168 was aborted.
169 169
170 170 If additionally, the user is accepting messages we send them the output
171 171 of the pre-pull hook.
172 172
173 173 Note that for clients not supporting side-band we just send them the
174 174 emtpy PACK file.
175 175 """
176 176
177 177 if self.SIDE_BAND_CAPS.intersection(capabilities):
178 178 response = []
179 179 proto = dulwich.protocol.Protocol(None, response.append)
180 180 proto.write_pkt_line(dulwich.protocol.NAK_LINE)
181 181
182 182 self._write_sideband_to_proto(proto, ascii_bytes(pre_pull_messages, allow_bytes=True), capabilities)
183 183 # N.B.(skreft): Do not change the sideband channel to 3, as that
184 184 # produces a fatal error in the client:
185 185 # fatal: error in sideband demultiplexer
186 186 proto.write_sideband(
187 187 dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS,
188 188 ascii_bytes('Pre pull hook failed: aborting\n', allow_bytes=True))
189 189 proto.write_sideband(
190 190 dulwich.protocol.SIDE_BAND_CHANNEL_DATA,
191 191 ascii_bytes(self.EMPTY_PACK, allow_bytes=True))
192 192
193 193 # writes b"0000" as default
194 194 proto.write_pkt_line(None)
195 195
196 196 return response
197 197 else:
198 198 return [ascii_bytes(self.EMPTY_PACK, allow_bytes=True)]
199 199
200 200 def _build_post_pull_response(self, response, capabilities, start_message, end_message):
201 201 """
202 202 Given a list response we inject the post-pull messages.
203 203
204 204 We only inject the messages if the client supports sideband, and the
205 205 response has the format:
206 206 0008NAK\n...0000
207 207
208 208 Note that we do not check the no-progress capability as by default, git
209 209 sends it, which effectively would block all messages.
210 210 """
211 211
212 212 if not self.SIDE_BAND_CAPS.intersection(capabilities):
213 213 return response
214 214
215 215 if not start_message and not end_message:
216 216 return response
217 217
218 218 try:
219 219 iter(response)
220 220 # iterator probably will work, we continue
221 221 except TypeError:
222 222 raise TypeError(f'response must be an iterator: got {type(response)}')
223 223 if isinstance(response, (list, tuple)):
224 224 raise TypeError(f'response must be an iterator: got {type(response)}')
225 225
226 226 def injected_response():
227 227
228 228 do_loop = 1
229 229 header_injected = 0
230 230 next_item = None
231 231 has_item = False
232 232 while do_loop:
233 233
234 234 try:
235 235 next_item = next(response)
236 236 except StopIteration:
237 237 do_loop = 0
238 238
239 239 if has_item:
240 240 # last item ! alter it now
241 241 if do_loop == 0 and item.endswith(self.FLUSH_PACKET):
242 242 new_response = [item[:-4]]
243 243 new_response.extend(self._get_messages(end_message, capabilities))
244 244 new_response.append(self.FLUSH_PACKET)
245 245 item = b''.join(new_response)
246 246
247 247 yield item
248 248 has_item = True
249 249 item = next_item
250 250
251 251 # alter item if it's the initial chunk
252 252 if not header_injected and item.startswith(b'0008NAK\n'):
253 253 new_response = [b'0008NAK\n']
254 254 new_response.extend(self._get_messages(start_message, capabilities))
255 255 new_response.append(item[8:])
256 256 item = b''.join(new_response)
257 257 header_injected = 1
258 258
259 259 return injected_response()
260 260
261 261 def _write_sideband_to_proto(self, proto, data, capabilities):
262 262 """
263 263 Write the data to the proto's sideband number 2 == SIDE_BAND_CHANNEL_PROGRESS
264 264
265 265 We do not use dulwich's write_sideband directly as it only supports
266 266 side-band-64k.
267 267 """
268 268 if not data:
269 269 return
270 270
271 271 # N.B.(skreft): The values below are explained in the pack protocol
272 272 # documentation, section Packfile Data.
273 273 # https://github.com/git/git/blob/master/Documentation/technical/pack-protocol.txt
274 274 if CAPABILITY_SIDE_BAND_64K in capabilities:
275 275 chunk_size = 65515
276 276 elif CAPABILITY_SIDE_BAND in capabilities:
277 277 chunk_size = 995
278 278 else:
279 279 return
280 280
281 281 chunker = (data[i:i + chunk_size] for i in range(0, len(data), chunk_size))
282 282
283 283 for chunk in chunker:
284 284 proto.write_sideband(dulwich.protocol.SIDE_BAND_CHANNEL_PROGRESS, ascii_bytes(chunk, allow_bytes=True))
285 285
286 286 def _get_messages(self, data, capabilities):
287 287 """Return a list with packets for sending data in sideband number 2."""
288 288 response = []
289 289 proto = dulwich.protocol.Protocol(None, response.append)
290 290
291 291 self._write_sideband_to_proto(proto, data, capabilities)
292 292
293 293 return response
294 294
295 295 def backend(self, request, environ):
296 296 """
297 297 WSGI Response producer for HTTP POST Git Smart HTTP requests.
298 298 Reads commands and data from HTTP POST's body.
299 299 returns an iterator obj with contents of git command's
300 300 response to stdout
301 301 """
302 302 # TODO(skreft): think how we could detect an HTTPLockedException, as
303 303 # we probably want to have the same mechanism used by mercurial and
304 304 # simplevcs.
305 305 # For that we would need to parse the output of the command looking for
306 306 # some signs of the HTTPLockedError, parse the data and reraise it in
307 307 # pygrack. However, that would interfere with the streaming.
308 308 #
309 309 # Now the output of a blocked push is:
310 310 # Pushing to http://test_regular:test12@127.0.0.1:5001/vcs_test_git
311 311 # POST git-receive-pack (1047 bytes)
312 312 # remote: ERROR: Repository `vcs_test_git` locked by user `test_admin`. Reason:`lock_auto`
313 313 # To http://test_regular:test12@127.0.0.1:5001/vcs_test_git
314 314 # ! [remote rejected] master -> master (pre-receive hook declined)
315 315 # error: failed to push some refs to 'http://test_regular:test12@127.0.0.1:5001/vcs_test_git'
316 316
317 317 git_command = self._get_fixedpath(request.path_info)
318 318 if git_command not in self.commands:
319 319 log.debug('command %s not allowed', git_command)
320 320 return exc.HTTPForbidden()
321 321
322 322 capabilities = None
323 323 if git_command == 'git-upload-pack':
324 324 capabilities = self._get_want_capabilities(request)
325 325
326 326 if 'CONTENT_LENGTH' in environ:
327 327 inputstream = FileWrapper(request.body_file_seekable,
328 328 request.content_length)
329 329 else:
330 330 inputstream = request.body_file_seekable
331 331
332 332 resp = Response()
333 resp.content_type = 'application/x-{}-result'.format(git_command)
333 resp.content_type = f'application/x-{git_command}-result'
334 334 resp.charset = None
335 335
336 336 pre_pull_messages = ''
337 337 # Upload-pack == clone
338 338 if git_command == 'git-upload-pack':
339 339 hook_response = hooks.git_pre_pull(self.extras)
340 340 if hook_response.status != 0:
341 341 pre_pull_messages = hook_response.output
342 342 resp.app_iter = self._build_failed_pre_pull_response(
343 343 capabilities, pre_pull_messages)
344 344 return resp
345 345
346 346 gitenv = dict(os.environ)
347 347 # forget all configs
348 348 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
349 349 gitenv['RC_SCM_DATA'] = json.dumps(self.extras)
350 350 cmd = [self.git_path, git_command[4:], '--stateless-rpc',
351 351 self.content_path]
352 352 log.debug('handling cmd %s', cmd)
353 353
354 354 out = subprocessio.SubprocessIOChunker(
355 355 cmd,
356 356 input_stream=inputstream,
357 357 env=gitenv,
358 358 cwd=self.content_path,
359 359 shell=False,
360 360 fail_on_stderr=False,
361 361 fail_on_return_code=False
362 362 )
363 363
364 364 if self.update_server_info and git_command == 'git-receive-pack':
365 365 # We need to fully consume the iterator here, as the
366 366 # update-server-info command needs to be run after the push.
367 367 out = list(out)
368 368
369 369 # Updating refs manually after each push.
370 370 # This is required as some clients are exposing Git repos internally
371 371 # with the dumb protocol.
372 372 cmd = [self.git_path, 'update-server-info']
373 373 log.debug('handling cmd %s', cmd)
374 374 output = subprocessio.SubprocessIOChunker(
375 375 cmd,
376 376 input_stream=inputstream,
377 377 env=gitenv,
378 378 cwd=self.content_path,
379 379 shell=False,
380 380 fail_on_stderr=False,
381 381 fail_on_return_code=False
382 382 )
383 383 # Consume all the output so the subprocess finishes
384 384 for _ in output:
385 385 pass
386 386
387 387 # Upload-pack == clone
388 388 if git_command == 'git-upload-pack':
389 389 hook_response = hooks.git_post_pull(self.extras)
390 390 post_pull_messages = hook_response.output
391 391 resp.app_iter = self._build_post_pull_response(out, capabilities, pre_pull_messages, post_pull_messages)
392 392 else:
393 393 resp.app_iter = out
394 394
395 395 return resp
396 396
397 397 def __call__(self, environ, start_response):
398 398 request = Request(environ)
399 399 _path = self._get_fixedpath(request.path_info)
400 400 if _path.startswith('info/refs'):
401 401 app = self.inforefs
402 402 else:
403 403 app = self.backend
404 404
405 405 try:
406 406 resp = app(request, environ)
407 407 except exc.HTTPException as error:
408 408 log.exception('HTTP Error')
409 409 resp = error
410 410 except Exception:
411 411 log.exception('Unknown error')
412 412 resp = exc.HTTPInternalServerError()
413 413
414 414 return resp(environ, start_response)
@@ -1,1382 +1,1382 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import collections
19 19 import logging
20 20 import os
21 21 import posixpath as vcspath
22 22 import re
23 23 import stat
24 24 import traceback
25 25 import urllib.request
26 26 import urllib.parse
27 27 import urllib.error
28 28 from functools import wraps
29 29
30 30 import more_itertools
31 31 import pygit2
32 32 from pygit2 import Repository as LibGit2Repo
33 33 from pygit2 import index as LibGit2Index
34 34 from dulwich import index, objects
35 35 from dulwich.client import HttpGitClient, LocalGitClient
36 36 from dulwich.errors import (
37 37 NotGitRepository, ChecksumMismatch, WrongObjectException,
38 38 MissingCommitError, ObjectMissing, HangupException,
39 39 UnexpectedCommandError)
40 40 from dulwich.repo import Repo as DulwichRepo
41 41 from dulwich.server import update_server_info
42 42
43 43 from vcsserver import exceptions, settings, subprocessio
44 44 from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes
45 45 from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope
46 46 from vcsserver.hgcompat import (
47 47 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
48 48 from vcsserver.git_lfs.lib import LFSOidStore
49 49 from vcsserver.vcs_base import RemoteBase
50 50
51 51 DIR_STAT = stat.S_IFDIR
52 52 FILE_MODE = stat.S_IFMT
53 53 GIT_LINK = objects.S_IFGITLINK
54 54 PEELED_REF_MARKER = b'^{}'
55 55 HEAD_MARKER = b'HEAD'
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def reraise_safe_exceptions(func):
61 61 """Converts Dulwich exceptions to something neutral."""
62 62
63 63 @wraps(func)
64 64 def wrapper(*args, **kwargs):
65 65 try:
66 66 return func(*args, **kwargs)
67 67 except (ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing,) as e:
68 68 exc = exceptions.LookupException(org_exc=e)
69 69 raise exc(safe_str(e))
70 70 except (HangupException, UnexpectedCommandError) as e:
71 71 exc = exceptions.VcsException(org_exc=e)
72 72 raise exc(safe_str(e))
73 73 except Exception:
74 74 # NOTE(marcink): because of how dulwich handles some exceptions
75 75 # (KeyError on empty repos), we cannot track this and catch all
76 76 # exceptions, it's an exceptions from other handlers
77 77 #if not hasattr(e, '_vcs_kind'):
78 78 #log.exception("Unhandled exception in git remote call")
79 79 #raise_from_original(exceptions.UnhandledException)
80 80 raise
81 81 return wrapper
82 82
83 83
84 84 class Repo(DulwichRepo):
85 85 """
86 86 A wrapper for dulwich Repo class.
87 87
88 88 Since dulwich is sometimes keeping .idx file descriptors open, it leads to
89 89 "Too many open files" error. We need to close all opened file descriptors
90 90 once the repo object is destroyed.
91 91 """
92 92 def __del__(self):
93 93 if hasattr(self, 'object_store'):
94 94 self.close()
95 95
96 96
97 97 class Repository(LibGit2Repo):
98 98
99 99 def __enter__(self):
100 100 return self
101 101
102 102 def __exit__(self, exc_type, exc_val, exc_tb):
103 103 self.free()
104 104
105 105
106 106 class GitFactory(RepoFactory):
107 107 repo_type = 'git'
108 108
109 109 def _create_repo(self, wire, create, use_libgit2=False):
110 110 if use_libgit2:
111 111 repo = Repository(safe_bytes(wire['path']))
112 112 else:
113 113 # dulwich mode
114 114 repo_path = safe_str(wire['path'], to_encoding=settings.WIRE_ENCODING)
115 115 repo = Repo(repo_path)
116 116
117 117 log.debug('repository created: got GIT object: %s', repo)
118 118 return repo
119 119
120 120 def repo(self, wire, create=False, use_libgit2=False):
121 121 """
122 122 Get a repository instance for the given path.
123 123 """
124 124 return self._create_repo(wire, create, use_libgit2)
125 125
126 126 def repo_libgit2(self, wire):
127 127 return self.repo(wire, use_libgit2=True)
128 128
129 129
130 130 class GitRemote(RemoteBase):
131 131
132 132 def __init__(self, factory):
133 133 self._factory = factory
134 134 self._bulk_methods = {
135 135 "date": self.date,
136 136 "author": self.author,
137 137 "branch": self.branch,
138 138 "message": self.message,
139 139 "parents": self.parents,
140 140 "_commit": self.revision,
141 141 }
142 142
143 143 def _wire_to_config(self, wire):
144 144 if 'config' in wire:
145 return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
145 return {x[0] + '_' + x[1]: x[2] for x in wire['config']}
146 146 return {}
147 147
148 148 def _remote_conf(self, config):
149 149 params = [
150 150 '-c', 'core.askpass=""',
151 151 ]
152 152 ssl_cert_dir = config.get('vcs_ssl_dir')
153 153 if ssl_cert_dir:
154 params.extend(['-c', 'http.sslCAinfo={}'.format(ssl_cert_dir)])
154 params.extend(['-c', f'http.sslCAinfo={ssl_cert_dir}'])
155 155 return params
156 156
157 157 @reraise_safe_exceptions
158 158 def discover_git_version(self):
159 159 stdout, _ = self.run_git_command(
160 160 {}, ['--version'], _bare=True, _safe=True)
161 161 prefix = b'git version'
162 162 if stdout.startswith(prefix):
163 163 stdout = stdout[len(prefix):]
164 164 return safe_str(stdout.strip())
165 165
166 166 @reraise_safe_exceptions
167 167 def is_empty(self, wire):
168 168 repo_init = self._factory.repo_libgit2(wire)
169 169 with repo_init as repo:
170 170
171 171 try:
172 172 has_head = repo.head.name
173 173 if has_head:
174 174 return False
175 175
176 176 # NOTE(marcink): check again using more expensive method
177 177 return repo.is_empty
178 178 except Exception:
179 179 pass
180 180
181 181 return True
182 182
183 183 @reraise_safe_exceptions
184 184 def assert_correct_path(self, wire):
185 185 cache_on, context_uid, repo_id = self._cache_on(wire)
186 186 region = self._region(wire)
187 187
188 188 @region.conditional_cache_on_arguments(condition=cache_on)
189 189 def _assert_correct_path(_context_uid, _repo_id, fast_check):
190 190 if fast_check:
191 191 path = safe_str(wire['path'])
192 192 if pygit2.discover_repository(path):
193 193 return True
194 194 return False
195 195 else:
196 196 try:
197 197 repo_init = self._factory.repo_libgit2(wire)
198 198 with repo_init:
199 199 pass
200 200 except pygit2.GitError:
201 201 path = wire.get('path')
202 202 tb = traceback.format_exc()
203 203 log.debug("Invalid Git path `%s`, tb: %s", path, tb)
204 204 return False
205 205 return True
206 206
207 207 return _assert_correct_path(context_uid, repo_id, True)
208 208
209 209 @reraise_safe_exceptions
210 210 def bare(self, wire):
211 211 repo_init = self._factory.repo_libgit2(wire)
212 212 with repo_init as repo:
213 213 return repo.is_bare
214 214
215 215 @reraise_safe_exceptions
216 216 def blob_as_pretty_string(self, wire, sha):
217 217 repo_init = self._factory.repo_libgit2(wire)
218 218 with repo_init as repo:
219 219 blob_obj = repo[sha]
220 220 return BinaryEnvelope(blob_obj.data)
221 221
222 222 @reraise_safe_exceptions
223 223 def blob_raw_length(self, wire, sha):
224 224 cache_on, context_uid, repo_id = self._cache_on(wire)
225 225 region = self._region(wire)
226 226
227 227 @region.conditional_cache_on_arguments(condition=cache_on)
228 228 def _blob_raw_length(_repo_id, _sha):
229 229
230 230 repo_init = self._factory.repo_libgit2(wire)
231 231 with repo_init as repo:
232 232 blob = repo[sha]
233 233 return blob.size
234 234
235 235 return _blob_raw_length(repo_id, sha)
236 236
237 237 def _parse_lfs_pointer(self, raw_content):
238 238 spec_string = b'version https://git-lfs.github.com/spec'
239 239 if raw_content and raw_content.startswith(spec_string):
240 240
241 241 pattern = re.compile(rb"""
242 242 (?:\n)?
243 243 ^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
244 244 ^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
245 245 ^size[ ](?P<oid_size>[0-9]+)\n
246 246 (?:\n)?
247 247 """, re.VERBOSE | re.MULTILINE)
248 248 match = pattern.match(raw_content)
249 249 if match:
250 250 return match.groupdict()
251 251
252 252 return {}
253 253
254 254 @reraise_safe_exceptions
255 255 def is_large_file(self, wire, commit_id):
256 256 cache_on, context_uid, repo_id = self._cache_on(wire)
257 257 region = self._region(wire)
258 258
259 259 @region.conditional_cache_on_arguments(condition=cache_on)
260 260 def _is_large_file(_repo_id, _sha):
261 261 repo_init = self._factory.repo_libgit2(wire)
262 262 with repo_init as repo:
263 263 blob = repo[commit_id]
264 264 if blob.is_binary:
265 265 return {}
266 266
267 267 return self._parse_lfs_pointer(blob.data)
268 268
269 269 return _is_large_file(repo_id, commit_id)
270 270
271 271 @reraise_safe_exceptions
272 272 def is_binary(self, wire, tree_id):
273 273 cache_on, context_uid, repo_id = self._cache_on(wire)
274 274 region = self._region(wire)
275 275
276 276 @region.conditional_cache_on_arguments(condition=cache_on)
277 277 def _is_binary(_repo_id, _tree_id):
278 278 repo_init = self._factory.repo_libgit2(wire)
279 279 with repo_init as repo:
280 280 blob_obj = repo[tree_id]
281 281 return blob_obj.is_binary
282 282
283 283 return _is_binary(repo_id, tree_id)
284 284
285 285 @reraise_safe_exceptions
286 286 def md5_hash(self, wire, tree_id):
287 287 cache_on, context_uid, repo_id = self._cache_on(wire)
288 288 region = self._region(wire)
289 289
290 290 @region.conditional_cache_on_arguments(condition=cache_on)
291 291 def _md5_hash(_repo_id, _tree_id):
292 292 return ''
293 293
294 294 return _md5_hash(repo_id, tree_id)
295 295
296 296 @reraise_safe_exceptions
297 297 def in_largefiles_store(self, wire, oid):
298 298 conf = self._wire_to_config(wire)
299 299 repo_init = self._factory.repo_libgit2(wire)
300 300 with repo_init as repo:
301 301 repo_name = repo.path
302 302
303 303 store_location = conf.get('vcs_git_lfs_store_location')
304 304 if store_location:
305 305
306 306 store = LFSOidStore(
307 307 oid=oid, repo=repo_name, store_location=store_location)
308 308 return store.has_oid()
309 309
310 310 return False
311 311
312 312 @reraise_safe_exceptions
313 313 def store_path(self, wire, oid):
314 314 conf = self._wire_to_config(wire)
315 315 repo_init = self._factory.repo_libgit2(wire)
316 316 with repo_init as repo:
317 317 repo_name = repo.path
318 318
319 319 store_location = conf.get('vcs_git_lfs_store_location')
320 320 if store_location:
321 321 store = LFSOidStore(
322 322 oid=oid, repo=repo_name, store_location=store_location)
323 323 return store.oid_path
324 raise ValueError('Unable to fetch oid with path {}'.format(oid))
324 raise ValueError(f'Unable to fetch oid with path {oid}')
325 325
326 326 @reraise_safe_exceptions
327 327 def bulk_request(self, wire, rev, pre_load):
328 328 cache_on, context_uid, repo_id = self._cache_on(wire)
329 329 region = self._region(wire)
330 330
331 331 @region.conditional_cache_on_arguments(condition=cache_on)
332 332 def _bulk_request(_repo_id, _rev, _pre_load):
333 333 result = {}
334 334 for attr in pre_load:
335 335 try:
336 336 method = self._bulk_methods[attr]
337 337 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
338 338 args = [wire, rev]
339 339 result[attr] = method(*args)
340 340 except KeyError as e:
341 341 raise exceptions.VcsException(e)(f"Unknown bulk attribute: {attr}")
342 342 return result
343 343
344 344 return _bulk_request(repo_id, rev, sorted(pre_load))
345 345
346 346 def _build_opener(self, url):
347 347 handlers = []
348 348 url_obj = url_parser(url)
349 349 _, authinfo = url_obj.authinfo()
350 350
351 351 if authinfo:
352 352 # create a password manager
353 353 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
354 354 passmgr.add_password(*authinfo)
355 355
356 356 handlers.extend((httpbasicauthhandler(passmgr),
357 357 httpdigestauthhandler(passmgr)))
358 358
359 359 return urllib.request.build_opener(*handlers)
360 360
361 361 def _type_id_to_name(self, type_id: int):
362 362 return {
363 363 1: 'commit',
364 364 2: 'tree',
365 365 3: 'blob',
366 366 4: 'tag'
367 367 }[type_id]
368 368
369 369 @reraise_safe_exceptions
370 370 def check_url(self, url, config):
371 371 url_obj = url_parser(safe_bytes(url))
372 372 test_uri, _ = url_obj.authinfo()
373 373 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
374 374 url_obj.query = obfuscate_qs(url_obj.query)
375 375 cleaned_uri = str(url_obj)
376 376 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
377 377
378 378 if not test_uri.endswith('info/refs'):
379 379 test_uri = test_uri.rstrip('/') + '/info/refs'
380 380
381 381 o = self._build_opener(url)
382 382 o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
383 383
384 384 q = {"service": 'git-upload-pack'}
385 385 qs = '?%s' % urllib.parse.urlencode(q)
386 cu = "%s%s" % (test_uri, qs)
386 cu = "{}{}".format(test_uri, qs)
387 387 req = urllib.request.Request(cu, None, {})
388 388
389 389 try:
390 390 log.debug("Trying to open URL %s", cleaned_uri)
391 391 resp = o.open(req)
392 392 if resp.code != 200:
393 393 raise exceptions.URLError()('Return Code is not 200')
394 394 except Exception as e:
395 395 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
396 396 # means it cannot be cloned
397 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
397 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
398 398
399 399 # now detect if it's proper git repo
400 400 gitdata = resp.read()
401 401 if 'service=git-upload-pack' in gitdata:
402 402 pass
403 403 elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
404 404 # old style git can return some other format !
405 405 pass
406 406 else:
407 407 raise exceptions.URLError()(
408 "url [%s] does not look like an git" % (cleaned_uri,))
408 "url [{}] does not look like an git".format(cleaned_uri))
409 409
410 410 return True
411 411
412 412 @reraise_safe_exceptions
413 413 def clone(self, wire, url, deferred, valid_refs, update_after_clone):
414 414 # TODO(marcink): deprecate this method. Last i checked we don't use it anymore
415 415 remote_refs = self.pull(wire, url, apply_refs=False)
416 416 repo = self._factory.repo(wire)
417 417 if isinstance(valid_refs, list):
418 418 valid_refs = tuple(valid_refs)
419 419
420 420 for k in remote_refs:
421 421 # only parse heads/tags and skip so called deferred tags
422 422 if k.startswith(valid_refs) and not k.endswith(deferred):
423 423 repo[k] = remote_refs[k]
424 424
425 425 if update_after_clone:
426 426 # we want to checkout HEAD
427 427 repo["HEAD"] = remote_refs["HEAD"]
428 428 index.build_index_from_tree(repo.path, repo.index_path(),
429 429 repo.object_store, repo["HEAD"].tree)
430 430
431 431 @reraise_safe_exceptions
432 432 def branch(self, wire, commit_id):
433 433 cache_on, context_uid, repo_id = self._cache_on(wire)
434 434 region = self._region(wire)
435 435
436 436 @region.conditional_cache_on_arguments(condition=cache_on)
437 437 def _branch(_context_uid, _repo_id, _commit_id):
438 438 regex = re.compile('^refs/heads')
439 439
440 440 def filter_with(ref):
441 441 return regex.match(ref[0]) and ref[1] == _commit_id
442 442
443 443 branches = list(filter(filter_with, list(self.get_refs(wire).items())))
444 444 return [x[0].split('refs/heads/')[-1] for x in branches]
445 445
446 446 return _branch(context_uid, repo_id, commit_id)
447 447
448 448 @reraise_safe_exceptions
449 449 def commit_branches(self, wire, commit_id):
450 450 cache_on, context_uid, repo_id = self._cache_on(wire)
451 451 region = self._region(wire)
452 452
453 453 @region.conditional_cache_on_arguments(condition=cache_on)
454 454 def _commit_branches(_context_uid, _repo_id, _commit_id):
455 455 repo_init = self._factory.repo_libgit2(wire)
456 456 with repo_init as repo:
457 457 branches = [x for x in repo.branches.with_commit(_commit_id)]
458 458 return branches
459 459
460 460 return _commit_branches(context_uid, repo_id, commit_id)
461 461
462 462 @reraise_safe_exceptions
463 463 def add_object(self, wire, content):
464 464 repo_init = self._factory.repo_libgit2(wire)
465 465 with repo_init as repo:
466 466 blob = objects.Blob()
467 467 blob.set_raw_string(content)
468 468 repo.object_store.add_object(blob)
469 469 return blob.id
470 470
471 471 # TODO: this is quite complex, check if that can be simplified
472 472 @reraise_safe_exceptions
473 473 def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
474 474 # Defines the root tree
475 475 class _Root(object):
476 476 def __repr__(self):
477 477 return 'ROOT TREE'
478 478 ROOT = _Root()
479 479
480 480 repo = self._factory.repo(wire)
481 481 object_store = repo.object_store
482 482
483 483 # Create tree and populates it with blobs
484 484 if commit_tree:
485 485 commit_tree = safe_bytes(commit_tree)
486 486
487 487 if commit_tree and repo[commit_tree]:
488 488 git_commit = repo[safe_bytes(commit_data['parents'][0])]
489 489 commit_tree = repo[git_commit.tree] # root tree
490 490 else:
491 491 commit_tree = objects.Tree()
492 492
493 493 for node in updated:
494 494 # Compute subdirs if needed
495 495 dirpath, nodename = vcspath.split(node['path'])
496 496 dirnames = list(map(safe_str, dirpath and dirpath.split('/') or []))
497 497 parent = commit_tree
498 498 ancestors = [('', parent)]
499 499
500 500 # Tries to dig for the deepest existing tree
501 501 while dirnames:
502 502 curdir = dirnames.pop(0)
503 503 try:
504 504 dir_id = parent[curdir][1]
505 505 except KeyError:
506 506 # put curdir back into dirnames and stops
507 507 dirnames.insert(0, curdir)
508 508 break
509 509 else:
510 510 # If found, updates parent
511 511 parent = repo[dir_id]
512 512 ancestors.append((curdir, parent))
513 513 # Now parent is deepest existing tree and we need to create
514 514 # subtrees for dirnames (in reverse order)
515 515 # [this only applies for nodes from added]
516 516 new_trees = []
517 517
518 518 blob = objects.Blob.from_string(node['content'])
519 519
520 520 node_path = safe_bytes(node['node_path'])
521 521
522 522 if dirnames:
523 523 # If there are trees which should be created we need to build
524 524 # them now (in reverse order)
525 525 reversed_dirnames = list(reversed(dirnames))
526 526 curtree = objects.Tree()
527 527 curtree[node_path] = node['mode'], blob.id
528 528 new_trees.append(curtree)
529 529 for dirname in reversed_dirnames[:-1]:
530 530 newtree = objects.Tree()
531 531 newtree[dirname] = (DIR_STAT, curtree.id)
532 532 new_trees.append(newtree)
533 533 curtree = newtree
534 534 parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
535 535 else:
536 536 parent.add(name=node_path, mode=node['mode'], hexsha=blob.id)
537 537
538 538 new_trees.append(parent)
539 539 # Update ancestors
540 540 reversed_ancestors = reversed(
541 541 [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
542 542 for parent, tree, path in reversed_ancestors:
543 543 parent[path] = (DIR_STAT, tree.id)
544 544 object_store.add_object(tree)
545 545
546 546 object_store.add_object(blob)
547 547 for tree in new_trees:
548 548 object_store.add_object(tree)
549 549
550 550 for node_path in removed:
551 551 paths = node_path.split('/')
552 552 tree = commit_tree # start with top-level
553 553 trees = [{'tree': tree, 'path': ROOT}]
554 554 # Traverse deep into the forest...
555 555 # resolve final tree by iterating the path.
556 556 # e.g a/b/c.txt will get
557 557 # - root as tree then
558 558 # - 'a' as tree,
559 559 # - 'b' as tree,
560 560 # - stop at c as blob.
561 561 for path in paths:
562 562 try:
563 563 obj = repo[tree[path][1]]
564 564 if isinstance(obj, objects.Tree):
565 565 trees.append({'tree': obj, 'path': path})
566 566 tree = obj
567 567 except KeyError:
568 568 break
569 569 #PROBLEM:
570 570 """
571 571 We're not editing same reference tree object
572 572 """
573 573 # Cut down the blob and all rotten trees on the way back...
574 574 for path, tree_data in reversed(list(zip(paths, trees))):
575 575 tree = tree_data['tree']
576 576 tree.__delitem__(path)
577 577 # This operation edits the tree, we need to mark new commit back
578 578
579 579 if len(tree) > 0:
580 580 # This tree still has elements - don't remove it or any
581 581 # of it's parents
582 582 break
583 583
584 584 object_store.add_object(commit_tree)
585 585
586 586 # Create commit
587 587 commit = objects.Commit()
588 588 commit.tree = commit_tree.id
589 589 bytes_keys = [
590 590 'author',
591 591 'committer',
592 592 'message',
593 593 'encoding',
594 594 'parents'
595 595 ]
596 596
597 597 for k, v in commit_data.items():
598 598 if k in bytes_keys:
599 599 if k == 'parents':
600 600 v = [safe_bytes(x) for x in v]
601 601 else:
602 602 v = safe_bytes(v)
603 603 setattr(commit, k, v)
604 604
605 605 object_store.add_object(commit)
606 606
607 607 self.create_branch(wire, branch, safe_str(commit.id))
608 608
609 609 # dulwich set-ref
610 610 repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id
611 611
612 612 return commit.id
613 613
614 614 @reraise_safe_exceptions
615 615 def pull(self, wire, url, apply_refs=True, refs=None, update_after=False):
616 616 if url != 'default' and '://' not in url:
617 617 client = LocalGitClient(url)
618 618 else:
619 619 url_obj = url_parser(url)
620 620 o = self._build_opener(url)
621 621 url, _ = url_obj.authinfo()
622 622 client = HttpGitClient(base_url=url, opener=o)
623 623 repo = self._factory.repo(wire)
624 624
625 625 determine_wants = repo.object_store.determine_wants_all
626 626 if refs:
627 627 refs = [ascii_bytes(x) for x in refs]
628 628
629 629 def determine_wants_requested(remote_refs):
630 630 determined = []
631 631 for ref_name, ref_hash in remote_refs.items():
632 632 bytes_ref_name = safe_bytes(ref_name)
633 633
634 634 if bytes_ref_name in refs:
635 635 bytes_ref_hash = safe_bytes(ref_hash)
636 636 determined.append(bytes_ref_hash)
637 637 return determined
638 638
639 639 # swap with our custom requested wants
640 640 determine_wants = determine_wants_requested
641 641
642 642 try:
643 643 remote_refs = client.fetch(
644 644 path=url, target=repo, determine_wants=determine_wants)
645 645
646 646 except NotGitRepository as e:
647 647 log.warning(
648 648 'Trying to fetch from "%s" failed, not a Git repository.', url)
649 649 # Exception can contain unicode which we convert
650 650 raise exceptions.AbortException(e)(repr(e))
651 651
652 652 # mikhail: client.fetch() returns all the remote refs, but fetches only
653 653 # refs filtered by `determine_wants` function. We need to filter result
654 654 # as well
655 655 if refs:
656 656 remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
657 657
658 658 if apply_refs:
659 659 # TODO: johbo: Needs proper test coverage with a git repository
660 660 # that contains a tag object, so that we would end up with
661 661 # a peeled ref at this point.
662 662 for k in remote_refs:
663 663 if k.endswith(PEELED_REF_MARKER):
664 664 log.debug("Skipping peeled reference %s", k)
665 665 continue
666 666 repo[k] = remote_refs[k]
667 667
668 668 if refs and not update_after:
669 669 # mikhail: explicitly set the head to the last ref.
670 670 repo[HEAD_MARKER] = remote_refs[refs[-1]]
671 671
672 672 if update_after:
673 673 # we want to check out HEAD
674 674 repo[HEAD_MARKER] = remote_refs[HEAD_MARKER]
675 675 index.build_index_from_tree(repo.path, repo.index_path(),
676 676 repo.object_store, repo[HEAD_MARKER].tree)
677 677 return remote_refs
678 678
679 679 @reraise_safe_exceptions
680 680 def sync_fetch(self, wire, url, refs=None, all_refs=False):
681 681 self._factory.repo(wire)
682 682 if refs and not isinstance(refs, (list, tuple)):
683 683 refs = [refs]
684 684
685 685 config = self._wire_to_config(wire)
686 686 # get all remote refs we'll use to fetch later
687 687 cmd = ['ls-remote']
688 688 if not all_refs:
689 689 cmd += ['--heads', '--tags']
690 690 cmd += [url]
691 691 output, __ = self.run_git_command(
692 692 wire, cmd, fail_on_stderr=False,
693 693 _copts=self._remote_conf(config),
694 694 extra_env={'GIT_TERMINAL_PROMPT': '0'})
695 695
696 696 remote_refs = collections.OrderedDict()
697 697 fetch_refs = []
698 698
699 699 for ref_line in output.splitlines():
700 700 sha, ref = ref_line.split(b'\t')
701 701 sha = sha.strip()
702 702 if ref in remote_refs:
703 703 # duplicate, skip
704 704 continue
705 705 if ref.endswith(PEELED_REF_MARKER):
706 706 log.debug("Skipping peeled reference %s", ref)
707 707 continue
708 708 # don't sync HEAD
709 709 if ref in [HEAD_MARKER]:
710 710 continue
711 711
712 712 remote_refs[ref] = sha
713 713
714 714 if refs and sha in refs:
715 715 # we filter fetch using our specified refs
716 716 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
717 717 elif not refs:
718 718 fetch_refs.append(f'{safe_str(ref)}:{safe_str(ref)}')
719 719 log.debug('Finished obtaining fetch refs, total: %s', len(fetch_refs))
720 720
721 721 if fetch_refs:
722 722 for chunk in more_itertools.chunked(fetch_refs, 1024 * 4):
723 723 fetch_refs_chunks = list(chunk)
724 724 log.debug('Fetching %s refs from import url', len(fetch_refs_chunks))
725 725 self.run_git_command(
726 726 wire, ['fetch', url, '--force', '--prune', '--'] + fetch_refs_chunks,
727 727 fail_on_stderr=False,
728 728 _copts=self._remote_conf(config),
729 729 extra_env={'GIT_TERMINAL_PROMPT': '0'})
730 730
731 731 return remote_refs
732 732
733 733 @reraise_safe_exceptions
734 734 def sync_push(self, wire, url, refs=None):
735 735 if not self.check_url(url, wire):
736 736 return
737 737 config = self._wire_to_config(wire)
738 738 self._factory.repo(wire)
739 739 self.run_git_command(
740 740 wire, ['push', url, '--mirror'], fail_on_stderr=False,
741 741 _copts=self._remote_conf(config),
742 742 extra_env={'GIT_TERMINAL_PROMPT': '0'})
743 743
744 744 @reraise_safe_exceptions
745 745 def get_remote_refs(self, wire, url):
746 746 repo = Repo(url)
747 747 return repo.get_refs()
748 748
749 749 @reraise_safe_exceptions
750 750 def get_description(self, wire):
751 751 repo = self._factory.repo(wire)
752 752 return repo.get_description()
753 753
754 754 @reraise_safe_exceptions
755 755 def get_missing_revs(self, wire, rev1, rev2, path2):
756 756 repo = self._factory.repo(wire)
757 757 LocalGitClient(thin_packs=False).fetch(path2, repo)
758 758
759 759 wire_remote = wire.copy()
760 760 wire_remote['path'] = path2
761 761 repo_remote = self._factory.repo(wire_remote)
762 762 LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
763 763
764 764 revs = [
765 765 x.commit.id
766 766 for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
767 767 return revs
768 768
769 769 @reraise_safe_exceptions
770 770 def get_object(self, wire, sha, maybe_unreachable=False):
771 771 cache_on, context_uid, repo_id = self._cache_on(wire)
772 772 region = self._region(wire)
773 773
774 774 @region.conditional_cache_on_arguments(condition=cache_on)
775 775 def _get_object(_context_uid, _repo_id, _sha):
776 776 repo_init = self._factory.repo_libgit2(wire)
777 777 with repo_init as repo:
778 778
779 779 missing_commit_err = 'Commit {} does not exist for `{}`'.format(sha, wire['path'])
780 780 try:
781 781 commit = repo.revparse_single(sha)
782 782 except KeyError:
783 783 # NOTE(marcink): KeyError doesn't give us any meaningful information
784 784 # here, we instead give something more explicit
785 785 e = exceptions.RefNotFoundException('SHA: %s not found', sha)
786 786 raise exceptions.LookupException(e)(missing_commit_err)
787 787 except ValueError as e:
788 788 raise exceptions.LookupException(e)(missing_commit_err)
789 789
790 790 is_tag = False
791 791 if isinstance(commit, pygit2.Tag):
792 792 commit = repo.get(commit.target)
793 793 is_tag = True
794 794
795 795 check_dangling = True
796 796 if is_tag:
797 797 check_dangling = False
798 798
799 799 if check_dangling and maybe_unreachable:
800 800 check_dangling = False
801 801
802 802 # we used a reference and it parsed means we're not having a dangling commit
803 803 if sha != commit.hex:
804 804 check_dangling = False
805 805
806 806 if check_dangling:
807 807 # check for dangling commit
808 808 for branch in repo.branches.with_commit(commit.hex):
809 809 if branch:
810 810 break
811 811 else:
812 812 # NOTE(marcink): Empty error doesn't give us any meaningful information
813 813 # here, we instead give something more explicit
814 814 e = exceptions.RefNotFoundException('SHA: %s not found in branches', sha)
815 815 raise exceptions.LookupException(e)(missing_commit_err)
816 816
817 817 commit_id = commit.hex
818 818 type_id = commit.type
819 819
820 820 return {
821 821 'id': commit_id,
822 822 'type': self._type_id_to_name(type_id),
823 823 'commit_id': commit_id,
824 824 'idx': 0
825 825 }
826 826
827 827 return _get_object(context_uid, repo_id, sha)
828 828
829 829 @reraise_safe_exceptions
830 830 def get_refs(self, wire):
831 831 cache_on, context_uid, repo_id = self._cache_on(wire)
832 832 region = self._region(wire)
833 833
834 834 @region.conditional_cache_on_arguments(condition=cache_on)
835 835 def _get_refs(_context_uid, _repo_id):
836 836
837 837 repo_init = self._factory.repo_libgit2(wire)
838 838 with repo_init as repo:
839 839 regex = re.compile('^refs/(heads|tags)/')
840 840 return {x.name: x.target.hex for x in
841 841 [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]}
842 842
843 843 return _get_refs(context_uid, repo_id)
844 844
845 845 @reraise_safe_exceptions
846 846 def get_branch_pointers(self, wire):
847 847 cache_on, context_uid, repo_id = self._cache_on(wire)
848 848 region = self._region(wire)
849 849
850 850 @region.conditional_cache_on_arguments(condition=cache_on)
851 851 def _get_branch_pointers(_context_uid, _repo_id):
852 852
853 853 repo_init = self._factory.repo_libgit2(wire)
854 854 regex = re.compile('^refs/heads')
855 855 with repo_init as repo:
856 856 branches = [ref for ref in repo.listall_reference_objects() if regex.match(ref.name)]
857 857 return {x.target.hex: x.shorthand for x in branches}
858 858
859 859 return _get_branch_pointers(context_uid, repo_id)
860 860
861 861 @reraise_safe_exceptions
862 862 def head(self, wire, show_exc=True):
863 863 cache_on, context_uid, repo_id = self._cache_on(wire)
864 864 region = self._region(wire)
865 865
866 866 @region.conditional_cache_on_arguments(condition=cache_on)
867 867 def _head(_context_uid, _repo_id, _show_exc):
868 868 repo_init = self._factory.repo_libgit2(wire)
869 869 with repo_init as repo:
870 870 try:
871 871 return repo.head.peel().hex
872 872 except Exception:
873 873 if show_exc:
874 874 raise
875 875 return _head(context_uid, repo_id, show_exc)
876 876
877 877 @reraise_safe_exceptions
878 878 def init(self, wire):
879 879 repo_path = safe_str(wire['path'])
880 880 self.repo = Repo.init(repo_path)
881 881
882 882 @reraise_safe_exceptions
883 883 def init_bare(self, wire):
884 884 repo_path = safe_str(wire['path'])
885 885 self.repo = Repo.init_bare(repo_path)
886 886
887 887 @reraise_safe_exceptions
888 888 def revision(self, wire, rev):
889 889
890 890 cache_on, context_uid, repo_id = self._cache_on(wire)
891 891 region = self._region(wire)
892 892
893 893 @region.conditional_cache_on_arguments(condition=cache_on)
894 894 def _revision(_context_uid, _repo_id, _rev):
895 895 repo_init = self._factory.repo_libgit2(wire)
896 896 with repo_init as repo:
897 897 commit = repo[rev]
898 898 obj_data = {
899 899 'id': commit.id.hex,
900 900 }
901 901 # tree objects itself don't have tree_id attribute
902 902 if hasattr(commit, 'tree_id'):
903 903 obj_data['tree'] = commit.tree_id.hex
904 904
905 905 return obj_data
906 906 return _revision(context_uid, repo_id, rev)
907 907
908 908 @reraise_safe_exceptions
909 909 def date(self, wire, commit_id):
910 910 cache_on, context_uid, repo_id = self._cache_on(wire)
911 911 region = self._region(wire)
912 912
913 913 @region.conditional_cache_on_arguments(condition=cache_on)
914 914 def _date(_repo_id, _commit_id):
915 915 repo_init = self._factory.repo_libgit2(wire)
916 916 with repo_init as repo:
917 917 commit = repo[commit_id]
918 918
919 919 if hasattr(commit, 'commit_time'):
920 920 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
921 921 else:
922 922 commit = commit.get_object()
923 923 commit_time, commit_time_offset = commit.commit_time, commit.commit_time_offset
924 924
925 925 # TODO(marcink): check dulwich difference of offset vs timezone
926 926 return [commit_time, commit_time_offset]
927 927 return _date(repo_id, commit_id)
928 928
929 929 @reraise_safe_exceptions
930 930 def author(self, wire, commit_id):
931 931 cache_on, context_uid, repo_id = self._cache_on(wire)
932 932 region = self._region(wire)
933 933
934 934 @region.conditional_cache_on_arguments(condition=cache_on)
935 935 def _author(_repo_id, _commit_id):
936 936 repo_init = self._factory.repo_libgit2(wire)
937 937 with repo_init as repo:
938 938 commit = repo[commit_id]
939 939
940 940 if hasattr(commit, 'author'):
941 941 author = commit.author
942 942 else:
943 943 author = commit.get_object().author
944 944
945 945 if author.email:
946 return "{} <{}>".format(author.name, author.email)
946 return f"{author.name} <{author.email}>"
947 947
948 948 try:
949 return "{}".format(author.name)
949 return f"{author.name}"
950 950 except Exception:
951 return "{}".format(safe_str(author.raw_name))
951 return f"{safe_str(author.raw_name)}"
952 952
953 953 return _author(repo_id, commit_id)
954 954
955 955 @reraise_safe_exceptions
956 956 def message(self, wire, commit_id):
957 957 cache_on, context_uid, repo_id = self._cache_on(wire)
958 958 region = self._region(wire)
959 959
960 960 @region.conditional_cache_on_arguments(condition=cache_on)
961 961 def _message(_repo_id, _commit_id):
962 962 repo_init = self._factory.repo_libgit2(wire)
963 963 with repo_init as repo:
964 964 commit = repo[commit_id]
965 965 return commit.message
966 966 return _message(repo_id, commit_id)
967 967
968 968 @reraise_safe_exceptions
969 969 def parents(self, wire, commit_id):
970 970 cache_on, context_uid, repo_id = self._cache_on(wire)
971 971 region = self._region(wire)
972 972
973 973 @region.conditional_cache_on_arguments(condition=cache_on)
974 974 def _parents(_repo_id, _commit_id):
975 975 repo_init = self._factory.repo_libgit2(wire)
976 976 with repo_init as repo:
977 977 commit = repo[commit_id]
978 978 if hasattr(commit, 'parent_ids'):
979 979 parent_ids = commit.parent_ids
980 980 else:
981 981 parent_ids = commit.get_object().parent_ids
982 982
983 983 return [x.hex for x in parent_ids]
984 984 return _parents(repo_id, commit_id)
985 985
986 986 @reraise_safe_exceptions
987 987 def children(self, wire, commit_id):
988 988 cache_on, context_uid, repo_id = self._cache_on(wire)
989 989 region = self._region(wire)
990 990
991 991 head = self.head(wire)
992 992
993 993 @region.conditional_cache_on_arguments(condition=cache_on)
994 994 def _children(_repo_id, _commit_id):
995 995
996 996 output, __ = self.run_git_command(
997 997 wire, ['rev-list', '--all', '--children', f'{commit_id}^..{head}'])
998 998
999 999 child_ids = []
1000 pat = re.compile(r'^{}'.format(commit_id))
1000 pat = re.compile(fr'^{commit_id}')
1001 1001 for line in output.splitlines():
1002 1002 line = safe_str(line)
1003 1003 if pat.match(line):
1004 1004 found_ids = line.split(' ')[1:]
1005 1005 child_ids.extend(found_ids)
1006 1006 break
1007 1007
1008 1008 return child_ids
1009 1009 return _children(repo_id, commit_id)
1010 1010
1011 1011 @reraise_safe_exceptions
1012 1012 def set_refs(self, wire, key, value):
1013 1013 repo_init = self._factory.repo_libgit2(wire)
1014 1014 with repo_init as repo:
1015 1015 repo.references.create(key, value, force=True)
1016 1016
1017 1017 @reraise_safe_exceptions
1018 1018 def create_branch(self, wire, branch_name, commit_id, force=False):
1019 1019 repo_init = self._factory.repo_libgit2(wire)
1020 1020 with repo_init as repo:
1021 1021 commit = repo[commit_id]
1022 1022
1023 1023 if force:
1024 1024 repo.branches.local.create(branch_name, commit, force=force)
1025 1025 elif not repo.branches.get(branch_name):
1026 1026 # create only if that branch isn't existing
1027 1027 repo.branches.local.create(branch_name, commit, force=force)
1028 1028
1029 1029 @reraise_safe_exceptions
1030 1030 def remove_ref(self, wire, key):
1031 1031 repo_init = self._factory.repo_libgit2(wire)
1032 1032 with repo_init as repo:
1033 1033 repo.references.delete(key)
1034 1034
1035 1035 @reraise_safe_exceptions
1036 1036 def tag_remove(self, wire, tag_name):
1037 1037 repo_init = self._factory.repo_libgit2(wire)
1038 1038 with repo_init as repo:
1039 key = 'refs/tags/{}'.format(tag_name)
1039 key = f'refs/tags/{tag_name}'
1040 1040 repo.references.delete(key)
1041 1041
1042 1042 @reraise_safe_exceptions
1043 1043 def tree_changes(self, wire, source_id, target_id):
1044 1044 # TODO(marcink): remove this seems it's only used by tests
1045 1045 repo = self._factory.repo(wire)
1046 1046 source = repo[source_id].tree if source_id else None
1047 1047 target = repo[target_id].tree
1048 1048 result = repo.object_store.tree_changes(source, target)
1049 1049 return list(result)
1050 1050
1051 1051 @reraise_safe_exceptions
1052 1052 def tree_and_type_for_path(self, wire, commit_id, path):
1053 1053
1054 1054 cache_on, context_uid, repo_id = self._cache_on(wire)
1055 1055 region = self._region(wire)
1056 1056
1057 1057 @region.conditional_cache_on_arguments(condition=cache_on)
1058 1058 def _tree_and_type_for_path(_context_uid, _repo_id, _commit_id, _path):
1059 1059 repo_init = self._factory.repo_libgit2(wire)
1060 1060
1061 1061 with repo_init as repo:
1062 1062 commit = repo[commit_id]
1063 1063 try:
1064 1064 tree = commit.tree[path]
1065 1065 except KeyError:
1066 1066 return None, None, None
1067 1067
1068 1068 return tree.id.hex, tree.type_str, tree.filemode
1069 1069 return _tree_and_type_for_path(context_uid, repo_id, commit_id, path)
1070 1070
1071 1071 @reraise_safe_exceptions
1072 1072 def tree_items(self, wire, tree_id):
1073 1073 cache_on, context_uid, repo_id = self._cache_on(wire)
1074 1074 region = self._region(wire)
1075 1075
1076 1076 @region.conditional_cache_on_arguments(condition=cache_on)
1077 1077 def _tree_items(_repo_id, _tree_id):
1078 1078
1079 1079 repo_init = self._factory.repo_libgit2(wire)
1080 1080 with repo_init as repo:
1081 1081 try:
1082 1082 tree = repo[tree_id]
1083 1083 except KeyError:
1084 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1084 raise ObjectMissing(f'No tree with id: {tree_id}')
1085 1085
1086 1086 result = []
1087 1087 for item in tree:
1088 1088 item_sha = item.hex
1089 1089 item_mode = item.filemode
1090 1090 item_type = item.type_str
1091 1091
1092 1092 if item_type == 'commit':
1093 1093 # NOTE(marcink): submodules we translate to 'link' for backward compat
1094 1094 item_type = 'link'
1095 1095
1096 1096 result.append((item.name, item_mode, item_sha, item_type))
1097 1097 return result
1098 1098 return _tree_items(repo_id, tree_id)
1099 1099
1100 1100 @reraise_safe_exceptions
1101 1101 def diff_2(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1102 1102 """
1103 1103 Old version that uses subprocess to call diff
1104 1104 """
1105 1105
1106 1106 flags = [
1107 1107 '-U%s' % context, '--patch',
1108 1108 '--binary',
1109 1109 '--find-renames',
1110 1110 '--no-indent-heuristic',
1111 1111 # '--indent-heuristic',
1112 1112 #'--full-index',
1113 1113 #'--abbrev=40'
1114 1114 ]
1115 1115
1116 1116 if opt_ignorews:
1117 1117 flags.append('--ignore-all-space')
1118 1118
1119 1119 if commit_id_1 == self.EMPTY_COMMIT:
1120 1120 cmd = ['show'] + flags + [commit_id_2]
1121 1121 else:
1122 1122 cmd = ['diff'] + flags + [commit_id_1, commit_id_2]
1123 1123
1124 1124 if file_filter:
1125 1125 cmd.extend(['--', file_filter])
1126 1126
1127 1127 diff, __ = self.run_git_command(wire, cmd)
1128 1128 # If we used 'show' command, strip first few lines (until actual diff
1129 1129 # starts)
1130 1130 if commit_id_1 == self.EMPTY_COMMIT:
1131 1131 lines = diff.splitlines()
1132 1132 x = 0
1133 1133 for line in lines:
1134 1134 if line.startswith(b'diff'):
1135 1135 break
1136 1136 x += 1
1137 1137 # Append new line just like 'diff' command do
1138 1138 diff = '\n'.join(lines[x:]) + '\n'
1139 1139 return diff
1140 1140
1141 1141 @reraise_safe_exceptions
1142 1142 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_ignorews, context):
1143 1143 repo_init = self._factory.repo_libgit2(wire)
1144 1144
1145 1145 with repo_init as repo:
1146 1146 swap = True
1147 1147 flags = 0
1148 1148 flags |= pygit2.GIT_DIFF_SHOW_BINARY
1149 1149
1150 1150 if opt_ignorews:
1151 1151 flags |= pygit2.GIT_DIFF_IGNORE_WHITESPACE
1152 1152
1153 1153 if commit_id_1 == self.EMPTY_COMMIT:
1154 1154 comm1 = repo[commit_id_2]
1155 1155 diff_obj = comm1.tree.diff_to_tree(
1156 1156 flags=flags, context_lines=context, swap=swap)
1157 1157
1158 1158 else:
1159 1159 comm1 = repo[commit_id_2]
1160 1160 comm2 = repo[commit_id_1]
1161 1161 diff_obj = comm1.tree.diff_to_tree(
1162 1162 comm2.tree, flags=flags, context_lines=context, swap=swap)
1163 1163 similar_flags = 0
1164 1164 similar_flags |= pygit2.GIT_DIFF_FIND_RENAMES
1165 1165 diff_obj.find_similar(flags=similar_flags)
1166 1166
1167 1167 if file_filter:
1168 1168 for p in diff_obj:
1169 1169 if p.delta.old_file.path == file_filter:
1170 1170 return BinaryEnvelope(p.data) or BinaryEnvelope(b'')
1171 1171 # fo matching path == no diff
1172 1172 return BinaryEnvelope(b'')
1173 1173 return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'')
1174 1174
1175 1175 @reraise_safe_exceptions
1176 1176 def node_history(self, wire, commit_id, path, limit):
1177 1177 cache_on, context_uid, repo_id = self._cache_on(wire)
1178 1178 region = self._region(wire)
1179 1179
1180 1180 @region.conditional_cache_on_arguments(condition=cache_on)
1181 1181 def _node_history(_context_uid, _repo_id, _commit_id, _path, _limit):
1182 1182 # optimize for n==1, rev-list is much faster for that use-case
1183 1183 if limit == 1:
1184 1184 cmd = ['rev-list', '-1', commit_id, '--', path]
1185 1185 else:
1186 1186 cmd = ['log']
1187 1187 if limit:
1188 1188 cmd.extend(['-n', str(safe_int(limit, 0))])
1189 1189 cmd.extend(['--pretty=format: %H', '-s', commit_id, '--', path])
1190 1190
1191 1191 output, __ = self.run_git_command(wire, cmd)
1192 1192 commit_ids = re.findall(rb'[0-9a-fA-F]{40}', output)
1193 1193
1194 1194 return [x for x in commit_ids]
1195 1195 return _node_history(context_uid, repo_id, commit_id, path, limit)
1196 1196
1197 1197 @reraise_safe_exceptions
1198 1198 def node_annotate_legacy(self, wire, commit_id, path):
1199 1199 # note: replaced by pygit2 implementation
1200 1200 cmd = ['blame', '-l', '--root', '-r', commit_id, '--', path]
1201 1201 # -l ==> outputs long shas (and we need all 40 characters)
1202 1202 # --root ==> doesn't put '^' character for boundaries
1203 1203 # -r commit_id ==> blames for the given commit
1204 1204 output, __ = self.run_git_command(wire, cmd)
1205 1205
1206 1206 result = []
1207 1207 for i, blame_line in enumerate(output.splitlines()[:-1]):
1208 1208 line_no = i + 1
1209 1209 blame_commit_id, line = re.split(rb' ', blame_line, 1)
1210 1210 result.append((line_no, blame_commit_id, line))
1211 1211
1212 1212 return result
1213 1213
1214 1214 @reraise_safe_exceptions
1215 1215 def node_annotate(self, wire, commit_id, path):
1216 1216
1217 1217 result_libgit = []
1218 1218 repo_init = self._factory.repo_libgit2(wire)
1219 1219 with repo_init as repo:
1220 1220 commit = repo[commit_id]
1221 1221 blame_obj = repo.blame(path, newest_commit=commit_id)
1222 1222 for i, line in enumerate(commit.tree[path].data.splitlines()):
1223 1223 line_no = i + 1
1224 1224 hunk = blame_obj.for_line(line_no)
1225 1225 blame_commit_id = hunk.final_commit_id.hex
1226 1226
1227 1227 result_libgit.append((line_no, blame_commit_id, line))
1228 1228
1229 1229 return result_libgit
1230 1230
1231 1231 @reraise_safe_exceptions
1232 1232 def update_server_info(self, wire):
1233 1233 repo = self._factory.repo(wire)
1234 1234 update_server_info(repo)
1235 1235
1236 1236 @reraise_safe_exceptions
1237 1237 def get_all_commit_ids(self, wire):
1238 1238
1239 1239 cache_on, context_uid, repo_id = self._cache_on(wire)
1240 1240 region = self._region(wire)
1241 1241
1242 1242 @region.conditional_cache_on_arguments(condition=cache_on)
1243 1243 def _get_all_commit_ids(_context_uid, _repo_id):
1244 1244
1245 1245 cmd = ['rev-list', '--reverse', '--date-order', '--branches', '--tags']
1246 1246 try:
1247 1247 output, __ = self.run_git_command(wire, cmd)
1248 1248 return output.splitlines()
1249 1249 except Exception:
1250 1250 # Can be raised for empty repositories
1251 1251 return []
1252 1252
1253 1253 @region.conditional_cache_on_arguments(condition=cache_on)
1254 1254 def _get_all_commit_ids_pygit2(_context_uid, _repo_id):
1255 1255 repo_init = self._factory.repo_libgit2(wire)
1256 1256 from pygit2 import GIT_SORT_REVERSE, GIT_SORT_TIME, GIT_BRANCH_ALL
1257 1257 results = []
1258 1258 with repo_init as repo:
1259 1259 for commit in repo.walk(repo.head.target, GIT_SORT_TIME | GIT_BRANCH_ALL | GIT_SORT_REVERSE):
1260 1260 results.append(commit.id.hex)
1261 1261
1262 1262 return _get_all_commit_ids(context_uid, repo_id)
1263 1263
1264 1264 @reraise_safe_exceptions
1265 1265 def run_git_command(self, wire, cmd, **opts):
1266 1266 path = wire.get('path', None)
1267 1267
1268 1268 if path and os.path.isdir(path):
1269 1269 opts['cwd'] = path
1270 1270
1271 1271 if '_bare' in opts:
1272 1272 _copts = []
1273 1273 del opts['_bare']
1274 1274 else:
1275 1275 _copts = ['-c', 'core.quotepath=false',]
1276 1276 safe_call = False
1277 1277 if '_safe' in opts:
1278 1278 # no exc on failure
1279 1279 del opts['_safe']
1280 1280 safe_call = True
1281 1281
1282 1282 if '_copts' in opts:
1283 1283 _copts.extend(opts['_copts'] or [])
1284 1284 del opts['_copts']
1285 1285
1286 1286 gitenv = os.environ.copy()
1287 1287 gitenv.update(opts.pop('extra_env', {}))
1288 1288 # need to clean fix GIT_DIR !
1289 1289 if 'GIT_DIR' in gitenv:
1290 1290 del gitenv['GIT_DIR']
1291 1291 gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
1292 1292 gitenv['GIT_DISCOVERY_ACROSS_FILESYSTEM'] = '1'
1293 1293
1294 1294 cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
1295 1295 _opts = {'env': gitenv, 'shell': False}
1296 1296
1297 1297 proc = None
1298 1298 try:
1299 1299 _opts.update(opts)
1300 1300 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
1301 1301
1302 1302 return b''.join(proc), b''.join(proc.stderr)
1303 1303 except OSError as err:
1304 1304 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
1305 1305 tb_err = ("Couldn't run git command (%s).\n"
1306 1306 "Original error was:%s\n"
1307 1307 "Call options:%s\n"
1308 1308 % (cmd, err, _opts))
1309 1309 log.exception(tb_err)
1310 1310 if safe_call:
1311 1311 return '', err
1312 1312 else:
1313 1313 raise exceptions.VcsException()(tb_err)
1314 1314 finally:
1315 1315 if proc:
1316 1316 proc.close()
1317 1317
1318 1318 @reraise_safe_exceptions
1319 1319 def install_hooks(self, wire, force=False):
1320 1320 from vcsserver.hook_utils import install_git_hooks
1321 1321 bare = self.bare(wire)
1322 1322 path = wire['path']
1323 1323 binary_dir = settings.BINARY_DIR
1324 1324 if binary_dir:
1325 1325 os.path.join(binary_dir, 'python3')
1326 1326 return install_git_hooks(path, bare, force_create=force)
1327 1327
1328 1328 @reraise_safe_exceptions
1329 1329 def get_hooks_info(self, wire):
1330 1330 from vcsserver.hook_utils import (
1331 1331 get_git_pre_hook_version, get_git_post_hook_version)
1332 1332 bare = self.bare(wire)
1333 1333 path = wire['path']
1334 1334 return {
1335 1335 'pre_version': get_git_pre_hook_version(path, bare),
1336 1336 'post_version': get_git_post_hook_version(path, bare),
1337 1337 }
1338 1338
1339 1339 @reraise_safe_exceptions
1340 1340 def set_head_ref(self, wire, head_name):
1341 1341 log.debug('Setting refs/head to `%s`', head_name)
1342 1342 repo_init = self._factory.repo_libgit2(wire)
1343 1343 with repo_init as repo:
1344 1344 repo.set_head(f'refs/heads/{head_name}')
1345 1345
1346 1346 return [head_name] + [f'set HEAD to refs/heads/{head_name}']
1347 1347
1348 1348 @reraise_safe_exceptions
1349 1349 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1350 1350 archive_dir_name, commit_id):
1351 1351
1352 1352 def file_walker(_commit_id, path):
1353 1353 repo_init = self._factory.repo_libgit2(wire)
1354 1354
1355 1355 with repo_init as repo:
1356 1356 commit = repo[commit_id]
1357 1357
1358 1358 if path in ['', '/']:
1359 1359 tree = commit.tree
1360 1360 else:
1361 1361 tree = commit.tree[path.rstrip('/')]
1362 1362 tree_id = tree.id.hex
1363 1363 try:
1364 1364 tree = repo[tree_id]
1365 1365 except KeyError:
1366 raise ObjectMissing('No tree with id: {}'.format(tree_id))
1366 raise ObjectMissing(f'No tree with id: {tree_id}')
1367 1367
1368 1368 index = LibGit2Index.Index()
1369 1369 index.read_tree(tree)
1370 1370 file_iter = index
1371 1371
1372 1372 for file_node in file_iter:
1373 1373 file_path = file_node.path
1374 1374 mode = file_node.mode
1375 1375 is_link = stat.S_ISLNK(mode)
1376 1376 if mode == pygit2.GIT_FILEMODE_COMMIT:
1377 1377 log.debug('Skipping path %s as a commit node', file_path)
1378 1378 continue
1379 1379 yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw)
1380 1380
1381 1381 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1382 1382 archive_dir_name, commit_id)
@@ -1,1105 +1,1105 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import binascii
18 18 import io
19 19 import logging
20 20 import stat
21 21 import urllib.request
22 22 import urllib.parse
23 23 import traceback
24 24 import hashlib
25 25
26 26 from hgext import largefiles, rebase, purge
27 27
28 28 from mercurial import commands
29 29 from mercurial import unionrepo
30 30 from mercurial import verify
31 31 from mercurial import repair
32 32
33 33 import vcsserver
34 34 from vcsserver import exceptions
35 35 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode, BinaryEnvelope
36 36 from vcsserver.hgcompat import (
37 37 archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx,
38 38 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler,
39 39 makepeer, instance, match, memctx, exchange, memfilectx, nullrev, hg_merge,
40 40 patch, peer, revrange, ui, hg_tag, Abort, LookupError, RepoError,
41 41 RepoLookupError, InterventionRequired, RequirementError,
42 42 alwaysmatcher, patternmatcher, hgutil, hgext_strip)
43 43 from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes
44 44 from vcsserver.vcs_base import RemoteBase
45 45
46 46 log = logging.getLogger(__name__)
47 47
48 48
49 49 def make_ui_from_config(repo_config):
50 50
51 51 class LoggingUI(ui.ui):
52 52
53 53 def status(self, *msg, **opts):
54 54 str_msg = map(safe_str, msg)
55 55 log.info(' '.join(str_msg).rstrip('\n'))
56 56 #super(LoggingUI, self).status(*msg, **opts)
57 57
58 58 def warn(self, *msg, **opts):
59 59 str_msg = map(safe_str, msg)
60 60 log.warning('ui_logger:'+' '.join(str_msg).rstrip('\n'))
61 61 #super(LoggingUI, self).warn(*msg, **opts)
62 62
63 63 def error(self, *msg, **opts):
64 64 str_msg = map(safe_str, msg)
65 65 log.error('ui_logger:'+' '.join(str_msg).rstrip('\n'))
66 66 #super(LoggingUI, self).error(*msg, **opts)
67 67
68 68 def note(self, *msg, **opts):
69 69 str_msg = map(safe_str, msg)
70 70 log.info('ui_logger:'+' '.join(str_msg).rstrip('\n'))
71 71 #super(LoggingUI, self).note(*msg, **opts)
72 72
73 73 def debug(self, *msg, **opts):
74 74 str_msg = map(safe_str, msg)
75 75 log.debug('ui_logger:'+' '.join(str_msg).rstrip('\n'))
76 76 #super(LoggingUI, self).debug(*msg, **opts)
77 77
78 78 baseui = LoggingUI()
79 79
80 80 # clean the baseui object
81 81 baseui._ocfg = hgconfig.config()
82 82 baseui._ucfg = hgconfig.config()
83 83 baseui._tcfg = hgconfig.config()
84 84
85 85 for section, option, value in repo_config:
86 86 baseui.setconfig(ascii_bytes(section), ascii_bytes(option), ascii_bytes(value))
87 87
88 88 # make our hgweb quiet so it doesn't print output
89 89 baseui.setconfig(b'ui', b'quiet', b'true')
90 90
91 91 baseui.setconfig(b'ui', b'paginate', b'never')
92 92 # for better Error reporting of Mercurial
93 93 baseui.setconfig(b'ui', b'message-output', b'stderr')
94 94
95 95 # force mercurial to only use 1 thread, otherwise it may try to set a
96 96 # signal in a non-main thread, thus generating a ValueError.
97 97 baseui.setconfig(b'worker', b'numcpus', 1)
98 98
99 99 # If there is no config for the largefiles extension, we explicitly disable
100 100 # it here. This overrides settings from repositories hgrc file. Recent
101 101 # mercurial versions enable largefiles in hgrc on clone from largefile
102 102 # repo.
103 103 if not baseui.hasconfig(b'extensions', b'largefiles'):
104 104 log.debug('Explicitly disable largefiles extension for repo.')
105 105 baseui.setconfig(b'extensions', b'largefiles', b'!')
106 106
107 107 return baseui
108 108
109 109
110 110 def reraise_safe_exceptions(func):
111 111 """Decorator for converting mercurial exceptions to something neutral."""
112 112
113 113 def wrapper(*args, **kwargs):
114 114 try:
115 115 return func(*args, **kwargs)
116 116 except (Abort, InterventionRequired) as e:
117 117 raise_from_original(exceptions.AbortException(e), e)
118 118 except RepoLookupError as e:
119 119 raise_from_original(exceptions.LookupException(e), e)
120 120 except RequirementError as e:
121 121 raise_from_original(exceptions.RequirementException(e), e)
122 122 except RepoError as e:
123 123 raise_from_original(exceptions.VcsException(e), e)
124 124 except LookupError as e:
125 125 raise_from_original(exceptions.LookupException(e), e)
126 126 except Exception as e:
127 127 if not hasattr(e, '_vcs_kind'):
128 128 log.exception("Unhandled exception in hg remote call")
129 129 raise_from_original(exceptions.UnhandledException(e), e)
130 130
131 131 raise
132 132 return wrapper
133 133
134 134
135 135 class MercurialFactory(RepoFactory):
136 136 repo_type = 'hg'
137 137
138 138 def _create_config(self, config, hooks=True):
139 139 if not hooks:
140 140 hooks_to_clean = frozenset((
141 141 'changegroup.repo_size', 'preoutgoing.pre_pull',
142 142 'outgoing.pull_logger', 'prechangegroup.pre_push'))
143 143 new_config = []
144 144 for section, option, value in config:
145 145 if section == 'hooks' and option in hooks_to_clean:
146 146 continue
147 147 new_config.append((section, option, value))
148 148 config = new_config
149 149
150 150 baseui = make_ui_from_config(config)
151 151 return baseui
152 152
153 153 def _create_repo(self, wire, create):
154 154 baseui = self._create_config(wire["config"])
155 155 repo = instance(baseui, safe_bytes(wire["path"]), create)
156 156 log.debug('repository created: got HG object: %s', repo)
157 157 return repo
158 158
159 159 def repo(self, wire, create=False):
160 160 """
161 161 Get a repository instance for the given path.
162 162 """
163 163 return self._create_repo(wire, create)
164 164
165 165
166 166 def patch_ui_message_output(baseui):
167 167 baseui.setconfig(b'ui', b'quiet', b'false')
168 168 output = io.BytesIO()
169 169
170 170 def write(data, **unused_kwargs):
171 171 output.write(data)
172 172
173 173 baseui.status = write
174 174 baseui.write = write
175 175 baseui.warn = write
176 176 baseui.debug = write
177 177
178 178 return baseui, output
179 179
180 180
181 181 class HgRemote(RemoteBase):
182 182
183 183 def __init__(self, factory):
184 184 self._factory = factory
185 185 self._bulk_methods = {
186 186 "affected_files": self.ctx_files,
187 187 "author": self.ctx_user,
188 188 "branch": self.ctx_branch,
189 189 "children": self.ctx_children,
190 190 "date": self.ctx_date,
191 191 "message": self.ctx_description,
192 192 "parents": self.ctx_parents,
193 193 "status": self.ctx_status,
194 194 "obsolete": self.ctx_obsolete,
195 195 "phase": self.ctx_phase,
196 196 "hidden": self.ctx_hidden,
197 197 "_file_paths": self.ctx_list,
198 198 }
199 199
200 200 def _get_ctx(self, repo, ref):
201 201 return get_ctx(repo, ref)
202 202
203 203 @reraise_safe_exceptions
204 204 def discover_hg_version(self):
205 205 from mercurial import util
206 206 return safe_str(util.version())
207 207
208 208 @reraise_safe_exceptions
209 209 def is_empty(self, wire):
210 210 repo = self._factory.repo(wire)
211 211
212 212 try:
213 213 return len(repo) == 0
214 214 except Exception:
215 215 log.exception("failed to read object_store")
216 216 return False
217 217
218 218 @reraise_safe_exceptions
219 219 def bookmarks(self, wire):
220 220 cache_on, context_uid, repo_id = self._cache_on(wire)
221 221 region = self._region(wire)
222 222
223 223 @region.conditional_cache_on_arguments(condition=cache_on)
224 224 def _bookmarks(_context_uid, _repo_id):
225 225 repo = self._factory.repo(wire)
226 226 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo._bookmarks.items()}
227 227
228 228 return _bookmarks(context_uid, repo_id)
229 229
230 230 @reraise_safe_exceptions
231 231 def branches(self, wire, normal, closed):
232 232 cache_on, context_uid, repo_id = self._cache_on(wire)
233 233 region = self._region(wire)
234 234
235 235 @region.conditional_cache_on_arguments(condition=cache_on)
236 236 def _branches(_context_uid, _repo_id, _normal, _closed):
237 237 repo = self._factory.repo(wire)
238 238 iter_branches = repo.branchmap().iterbranches()
239 239 bt = {}
240 240 for branch_name, _heads, tip_node, is_closed in iter_branches:
241 241 if normal and not is_closed:
242 242 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
243 243 if closed and is_closed:
244 244 bt[safe_str(branch_name)] = ascii_str(hex(tip_node))
245 245
246 246 return bt
247 247
248 248 return _branches(context_uid, repo_id, normal, closed)
249 249
250 250 @reraise_safe_exceptions
251 251 def bulk_request(self, wire, commit_id, pre_load):
252 252 cache_on, context_uid, repo_id = self._cache_on(wire)
253 253 region = self._region(wire)
254 254
255 255 @region.conditional_cache_on_arguments(condition=cache_on)
256 256 def _bulk_request(_repo_id, _commit_id, _pre_load):
257 257 result = {}
258 258 for attr in pre_load:
259 259 try:
260 260 method = self._bulk_methods[attr]
261 261 wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache
262 262 result[attr] = method(wire, commit_id)
263 263 except KeyError as e:
264 264 raise exceptions.VcsException(e)(
265 265 'Unknown bulk attribute: "%s"' % attr)
266 266 return result
267 267
268 268 return _bulk_request(repo_id, commit_id, sorted(pre_load))
269 269
270 270 @reraise_safe_exceptions
271 271 def ctx_branch(self, wire, commit_id):
272 272 cache_on, context_uid, repo_id = self._cache_on(wire)
273 273 region = self._region(wire)
274 274
275 275 @region.conditional_cache_on_arguments(condition=cache_on)
276 276 def _ctx_branch(_repo_id, _commit_id):
277 277 repo = self._factory.repo(wire)
278 278 ctx = self._get_ctx(repo, commit_id)
279 279 return ctx.branch()
280 280 return _ctx_branch(repo_id, commit_id)
281 281
282 282 @reraise_safe_exceptions
283 283 def ctx_date(self, wire, commit_id):
284 284 cache_on, context_uid, repo_id = self._cache_on(wire)
285 285 region = self._region(wire)
286 286
287 287 @region.conditional_cache_on_arguments(condition=cache_on)
288 288 def _ctx_date(_repo_id, _commit_id):
289 289 repo = self._factory.repo(wire)
290 290 ctx = self._get_ctx(repo, commit_id)
291 291 return ctx.date()
292 292 return _ctx_date(repo_id, commit_id)
293 293
294 294 @reraise_safe_exceptions
295 295 def ctx_description(self, wire, revision):
296 296 repo = self._factory.repo(wire)
297 297 ctx = self._get_ctx(repo, revision)
298 298 return ctx.description()
299 299
300 300 @reraise_safe_exceptions
301 301 def ctx_files(self, wire, commit_id):
302 302 cache_on, context_uid, repo_id = self._cache_on(wire)
303 303 region = self._region(wire)
304 304
305 305 @region.conditional_cache_on_arguments(condition=cache_on)
306 306 def _ctx_files(_repo_id, _commit_id):
307 307 repo = self._factory.repo(wire)
308 308 ctx = self._get_ctx(repo, commit_id)
309 309 return ctx.files()
310 310
311 311 return _ctx_files(repo_id, commit_id)
312 312
313 313 @reraise_safe_exceptions
314 314 def ctx_list(self, path, revision):
315 315 repo = self._factory.repo(path)
316 316 ctx = self._get_ctx(repo, revision)
317 317 return list(ctx)
318 318
319 319 @reraise_safe_exceptions
320 320 def ctx_parents(self, wire, commit_id):
321 321 cache_on, context_uid, repo_id = self._cache_on(wire)
322 322 region = self._region(wire)
323 323
324 324 @region.conditional_cache_on_arguments(condition=cache_on)
325 325 def _ctx_parents(_repo_id, _commit_id):
326 326 repo = self._factory.repo(wire)
327 327 ctx = self._get_ctx(repo, commit_id)
328 328 return [parent.hex() for parent in ctx.parents()
329 329 if not (parent.hidden() or parent.obsolete())]
330 330
331 331 return _ctx_parents(repo_id, commit_id)
332 332
333 333 @reraise_safe_exceptions
334 334 def ctx_children(self, wire, commit_id):
335 335 cache_on, context_uid, repo_id = self._cache_on(wire)
336 336 region = self._region(wire)
337 337
338 338 @region.conditional_cache_on_arguments(condition=cache_on)
339 339 def _ctx_children(_repo_id, _commit_id):
340 340 repo = self._factory.repo(wire)
341 341 ctx = self._get_ctx(repo, commit_id)
342 342 return [child.hex() for child in ctx.children()
343 343 if not (child.hidden() or child.obsolete())]
344 344
345 345 return _ctx_children(repo_id, commit_id)
346 346
347 347 @reraise_safe_exceptions
348 348 def ctx_phase(self, wire, commit_id):
349 349 cache_on, context_uid, repo_id = self._cache_on(wire)
350 350 region = self._region(wire)
351 351
352 352 @region.conditional_cache_on_arguments(condition=cache_on)
353 353 def _ctx_phase(_context_uid, _repo_id, _commit_id):
354 354 repo = self._factory.repo(wire)
355 355 ctx = self._get_ctx(repo, commit_id)
356 356 # public=0, draft=1, secret=3
357 357 return ctx.phase()
358 358 return _ctx_phase(context_uid, repo_id, commit_id)
359 359
360 360 @reraise_safe_exceptions
361 361 def ctx_obsolete(self, wire, commit_id):
362 362 cache_on, context_uid, repo_id = self._cache_on(wire)
363 363 region = self._region(wire)
364 364
365 365 @region.conditional_cache_on_arguments(condition=cache_on)
366 366 def _ctx_obsolete(_context_uid, _repo_id, _commit_id):
367 367 repo = self._factory.repo(wire)
368 368 ctx = self._get_ctx(repo, commit_id)
369 369 return ctx.obsolete()
370 370 return _ctx_obsolete(context_uid, repo_id, commit_id)
371 371
372 372 @reraise_safe_exceptions
373 373 def ctx_hidden(self, wire, commit_id):
374 374 cache_on, context_uid, repo_id = self._cache_on(wire)
375 375 region = self._region(wire)
376 376
377 377 @region.conditional_cache_on_arguments(condition=cache_on)
378 378 def _ctx_hidden(_context_uid, _repo_id, _commit_id):
379 379 repo = self._factory.repo(wire)
380 380 ctx = self._get_ctx(repo, commit_id)
381 381 return ctx.hidden()
382 382 return _ctx_hidden(context_uid, repo_id, commit_id)
383 383
384 384 @reraise_safe_exceptions
385 385 def ctx_substate(self, wire, revision):
386 386 repo = self._factory.repo(wire)
387 387 ctx = self._get_ctx(repo, revision)
388 388 return ctx.substate
389 389
390 390 @reraise_safe_exceptions
391 391 def ctx_status(self, wire, revision):
392 392 repo = self._factory.repo(wire)
393 393 ctx = self._get_ctx(repo, revision)
394 394 status = repo[ctx.p1().node()].status(other=ctx.node())
395 395 # object of status (odd, custom named tuple in mercurial) is not
396 396 # correctly serializable, we make it a list, as the underling
397 397 # API expects this to be a list
398 398 return list(status)
399 399
400 400 @reraise_safe_exceptions
401 401 def ctx_user(self, wire, revision):
402 402 repo = self._factory.repo(wire)
403 403 ctx = self._get_ctx(repo, revision)
404 404 return ctx.user()
405 405
406 406 @reraise_safe_exceptions
407 407 def check_url(self, url, config):
408 408 _proto = None
409 409 if '+' in url[:url.find('://')]:
410 410 _proto = url[0:url.find('+')]
411 411 url = url[url.find('+') + 1:]
412 412 handlers = []
413 413 url_obj = url_parser(url)
414 414 test_uri, authinfo = url_obj.authinfo()
415 415 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
416 416 url_obj.query = obfuscate_qs(url_obj.query)
417 417
418 418 cleaned_uri = str(url_obj)
419 419 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
420 420
421 421 if authinfo:
422 422 # create a password manager
423 423 passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
424 424 passmgr.add_password(*authinfo)
425 425
426 426 handlers.extend((httpbasicauthhandler(passmgr),
427 427 httpdigestauthhandler(passmgr)))
428 428
429 429 o = urllib.request.build_opener(*handlers)
430 430 o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
431 431 ('Accept', 'application/mercurial-0.1')]
432 432
433 433 q = {"cmd": 'between'}
434 q.update({'pairs': "%s-%s" % ('0' * 40, '0' * 40)})
434 q.update({'pairs': "{}-{}".format('0' * 40, '0' * 40)})
435 435 qs = '?%s' % urllib.parse.urlencode(q)
436 cu = "%s%s" % (test_uri, qs)
436 cu = "{}{}".format(test_uri, qs)
437 437 req = urllib.request.Request(cu, None, {})
438 438
439 439 try:
440 440 log.debug("Trying to open URL %s", cleaned_uri)
441 441 resp = o.open(req)
442 442 if resp.code != 200:
443 443 raise exceptions.URLError()('Return Code is not 200')
444 444 except Exception as e:
445 445 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
446 446 # means it cannot be cloned
447 raise exceptions.URLError(e)("[%s] org_exc: %s" % (cleaned_uri, e))
447 raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e))
448 448
449 449 # now check if it's a proper hg repo, but don't do it for svn
450 450 try:
451 451 if _proto == 'svn':
452 452 pass
453 453 else:
454 454 # check for pure hg repos
455 455 log.debug(
456 456 "Verifying if URL is a Mercurial repository: %s",
457 457 cleaned_uri)
458 458 ui = make_ui_from_config(config)
459 459 peer_checker = makepeer(ui, url)
460 460 peer_checker.lookup('tip')
461 461 except Exception as e:
462 462 log.warning("URL is not a valid Mercurial repository: %s",
463 463 cleaned_uri)
464 464 raise exceptions.URLError(e)(
465 465 "url [%s] does not look like an hg repo org_exc: %s"
466 466 % (cleaned_uri, e))
467 467
468 468 log.info("URL is a valid Mercurial repository: %s", cleaned_uri)
469 469 return True
470 470
471 471 @reraise_safe_exceptions
472 472 def diff(self, wire, commit_id_1, commit_id_2, file_filter, opt_git, opt_ignorews, context):
473 473 repo = self._factory.repo(wire)
474 474
475 475 if file_filter:
476 476 # unpack the file-filter
477 477 repo_path, node_path = file_filter
478 478 match_filter = match(safe_bytes(repo_path), b'', [safe_bytes(node_path)])
479 479 else:
480 480 match_filter = file_filter
481 481 opts = diffopts(git=opt_git, ignorews=opt_ignorews, context=context, showfunc=1)
482 482
483 483 try:
484 484 diff_iter = patch.diff(
485 485 repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts)
486 486 return BinaryEnvelope(b"".join(diff_iter))
487 487 except RepoLookupError as e:
488 488 raise exceptions.LookupException(e)()
489 489
490 490 @reraise_safe_exceptions
491 491 def node_history(self, wire, revision, path, limit):
492 492 cache_on, context_uid, repo_id = self._cache_on(wire)
493 493 region = self._region(wire)
494 494
495 495 @region.conditional_cache_on_arguments(condition=cache_on)
496 496 def _node_history(_context_uid, _repo_id, _revision, _path, _limit):
497 497 repo = self._factory.repo(wire)
498 498
499 499 ctx = self._get_ctx(repo, revision)
500 500 fctx = ctx.filectx(safe_bytes(path))
501 501
502 502 def history_iter():
503 503 limit_rev = fctx.rev()
504 504 for obj in reversed(list(fctx.filelog())):
505 505 obj = fctx.filectx(obj)
506 506 ctx = obj.changectx()
507 507 if ctx.hidden() or ctx.obsolete():
508 508 continue
509 509
510 510 if limit_rev >= obj.rev():
511 511 yield obj
512 512
513 513 history = []
514 514 for cnt, obj in enumerate(history_iter()):
515 515 if limit and cnt >= limit:
516 516 break
517 517 history.append(hex(obj.node()))
518 518
519 519 return [x for x in history]
520 520 return _node_history(context_uid, repo_id, revision, path, limit)
521 521
522 522 @reraise_safe_exceptions
523 523 def node_history_untill(self, wire, revision, path, limit):
524 524 cache_on, context_uid, repo_id = self._cache_on(wire)
525 525 region = self._region(wire)
526 526
527 527 @region.conditional_cache_on_arguments(condition=cache_on)
528 528 def _node_history_until(_context_uid, _repo_id):
529 529 repo = self._factory.repo(wire)
530 530 ctx = self._get_ctx(repo, revision)
531 531 fctx = ctx.filectx(safe_bytes(path))
532 532
533 533 file_log = list(fctx.filelog())
534 534 if limit:
535 535 # Limit to the last n items
536 536 file_log = file_log[-limit:]
537 537
538 538 return [hex(fctx.filectx(cs).node()) for cs in reversed(file_log)]
539 539 return _node_history_until(context_uid, repo_id, revision, path, limit)
540 540
541 541 @reraise_safe_exceptions
542 542 def fctx_annotate(self, wire, revision, path):
543 543 repo = self._factory.repo(wire)
544 544 ctx = self._get_ctx(repo, revision)
545 545 fctx = ctx.filectx(safe_bytes(path))
546 546
547 547 result = []
548 548 for i, annotate_obj in enumerate(fctx.annotate(), 1):
549 549 ln_no = i
550 550 sha = hex(annotate_obj.fctx.node())
551 551 content = annotate_obj.text
552 552 result.append((ln_no, sha, content))
553 553 return result
554 554
555 555 @reraise_safe_exceptions
556 556 def fctx_node_data(self, wire, revision, path):
557 557 repo = self._factory.repo(wire)
558 558 ctx = self._get_ctx(repo, revision)
559 559 fctx = ctx.filectx(safe_bytes(path))
560 560 return BinaryEnvelope(fctx.data())
561 561
562 562 @reraise_safe_exceptions
563 563 def fctx_flags(self, wire, commit_id, path):
564 564 cache_on, context_uid, repo_id = self._cache_on(wire)
565 565 region = self._region(wire)
566 566
567 567 @region.conditional_cache_on_arguments(condition=cache_on)
568 568 def _fctx_flags(_repo_id, _commit_id, _path):
569 569 repo = self._factory.repo(wire)
570 570 ctx = self._get_ctx(repo, commit_id)
571 571 fctx = ctx.filectx(safe_bytes(path))
572 572 return fctx.flags()
573 573
574 574 return _fctx_flags(repo_id, commit_id, path)
575 575
576 576 @reraise_safe_exceptions
577 577 def fctx_size(self, wire, commit_id, path):
578 578 cache_on, context_uid, repo_id = self._cache_on(wire)
579 579 region = self._region(wire)
580 580
581 581 @region.conditional_cache_on_arguments(condition=cache_on)
582 582 def _fctx_size(_repo_id, _revision, _path):
583 583 repo = self._factory.repo(wire)
584 584 ctx = self._get_ctx(repo, commit_id)
585 585 fctx = ctx.filectx(safe_bytes(path))
586 586 return fctx.size()
587 587 return _fctx_size(repo_id, commit_id, path)
588 588
589 589 @reraise_safe_exceptions
590 590 def get_all_commit_ids(self, wire, name):
591 591 cache_on, context_uid, repo_id = self._cache_on(wire)
592 592 region = self._region(wire)
593 593
594 594 @region.conditional_cache_on_arguments(condition=cache_on)
595 595 def _get_all_commit_ids(_context_uid, _repo_id, _name):
596 596 repo = self._factory.repo(wire)
597 597 revs = [ascii_str(repo[x].hex()) for x in repo.filtered(b'visible').changelog.revs()]
598 598 return revs
599 599 return _get_all_commit_ids(context_uid, repo_id, name)
600 600
601 601 @reraise_safe_exceptions
602 602 def get_config_value(self, wire, section, name, untrusted=False):
603 603 repo = self._factory.repo(wire)
604 604 return repo.ui.config(ascii_bytes(section), ascii_bytes(name), untrusted=untrusted)
605 605
606 606 @reraise_safe_exceptions
607 607 def is_large_file(self, wire, commit_id, path):
608 608 cache_on, context_uid, repo_id = self._cache_on(wire)
609 609 region = self._region(wire)
610 610
611 611 @region.conditional_cache_on_arguments(condition=cache_on)
612 612 def _is_large_file(_context_uid, _repo_id, _commit_id, _path):
613 613 return largefiles.lfutil.isstandin(safe_bytes(path))
614 614
615 615 return _is_large_file(context_uid, repo_id, commit_id, path)
616 616
617 617 @reraise_safe_exceptions
618 618 def is_binary(self, wire, revision, path):
619 619 cache_on, context_uid, repo_id = self._cache_on(wire)
620 620 region = self._region(wire)
621 621
622 622 @region.conditional_cache_on_arguments(condition=cache_on)
623 623 def _is_binary(_repo_id, _sha, _path):
624 624 repo = self._factory.repo(wire)
625 625 ctx = self._get_ctx(repo, revision)
626 626 fctx = ctx.filectx(safe_bytes(path))
627 627 return fctx.isbinary()
628 628
629 629 return _is_binary(repo_id, revision, path)
630 630
631 631 @reraise_safe_exceptions
632 632 def md5_hash(self, wire, revision, path):
633 633 cache_on, context_uid, repo_id = self._cache_on(wire)
634 634 region = self._region(wire)
635 635
636 636 @region.conditional_cache_on_arguments(condition=cache_on)
637 637 def _md5_hash(_repo_id, _sha, _path):
638 638 repo = self._factory.repo(wire)
639 639 ctx = self._get_ctx(repo, revision)
640 640 fctx = ctx.filectx(safe_bytes(path))
641 641 return hashlib.md5(fctx.data()).hexdigest()
642 642
643 643 return _md5_hash(repo_id, revision, path)
644 644
645 645 @reraise_safe_exceptions
646 646 def in_largefiles_store(self, wire, sha):
647 647 repo = self._factory.repo(wire)
648 648 return largefiles.lfutil.instore(repo, sha)
649 649
650 650 @reraise_safe_exceptions
651 651 def in_user_cache(self, wire, sha):
652 652 repo = self._factory.repo(wire)
653 653 return largefiles.lfutil.inusercache(repo.ui, sha)
654 654
655 655 @reraise_safe_exceptions
656 656 def store_path(self, wire, sha):
657 657 repo = self._factory.repo(wire)
658 658 return largefiles.lfutil.storepath(repo, sha)
659 659
660 660 @reraise_safe_exceptions
661 661 def link(self, wire, sha, path):
662 662 repo = self._factory.repo(wire)
663 663 largefiles.lfutil.link(
664 664 largefiles.lfutil.usercachepath(repo.ui, sha), path)
665 665
666 666 @reraise_safe_exceptions
667 667 def localrepository(self, wire, create=False):
668 668 self._factory.repo(wire, create=create)
669 669
670 670 @reraise_safe_exceptions
671 671 def lookup(self, wire, revision, both):
672 672 cache_on, context_uid, repo_id = self._cache_on(wire)
673 673 region = self._region(wire)
674 674
675 675 @region.conditional_cache_on_arguments(condition=cache_on)
676 676 def _lookup(_context_uid, _repo_id, _revision, _both):
677 677
678 678 repo = self._factory.repo(wire)
679 679 rev = _revision
680 680 if isinstance(rev, int):
681 681 # NOTE(marcink):
682 682 # since Mercurial doesn't support negative indexes properly
683 683 # we need to shift accordingly by one to get proper index, e.g
684 684 # repo[-1] => repo[-2]
685 685 # repo[0] => repo[-1]
686 686 if rev <= 0:
687 687 rev = rev + -1
688 688 try:
689 689 ctx = self._get_ctx(repo, rev)
690 690 except (TypeError, RepoLookupError, binascii.Error) as e:
691 691 e._org_exc_tb = traceback.format_exc()
692 692 raise exceptions.LookupException(e)(rev)
693 693 except LookupError as e:
694 694 e._org_exc_tb = traceback.format_exc()
695 695 raise exceptions.LookupException(e)(e.name)
696 696
697 697 if not both:
698 698 return ctx.hex()
699 699
700 700 ctx = repo[ctx.hex()]
701 701 return ctx.hex(), ctx.rev()
702 702
703 703 return _lookup(context_uid, repo_id, revision, both)
704 704
705 705 @reraise_safe_exceptions
706 706 def sync_push(self, wire, url):
707 707 if not self.check_url(url, wire['config']):
708 708 return
709 709
710 710 repo = self._factory.repo(wire)
711 711
712 712 # Disable any prompts for this repo
713 713 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
714 714
715 715 bookmarks = list(dict(repo._bookmarks).keys())
716 716 remote = peer(repo, {}, safe_bytes(url))
717 717 # Disable any prompts for this remote
718 718 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
719 719
720 720 return exchange.push(
721 721 repo, remote, newbranch=True, bookmarks=bookmarks).cgresult
722 722
723 723 @reraise_safe_exceptions
724 724 def revision(self, wire, rev):
725 725 repo = self._factory.repo(wire)
726 726 ctx = self._get_ctx(repo, rev)
727 727 return ctx.rev()
728 728
729 729 @reraise_safe_exceptions
730 730 def rev_range(self, wire, commit_filter):
731 731 cache_on, context_uid, repo_id = self._cache_on(wire)
732 732 region = self._region(wire)
733 733
734 734 @region.conditional_cache_on_arguments(condition=cache_on)
735 735 def _rev_range(_context_uid, _repo_id, _filter):
736 736 repo = self._factory.repo(wire)
737 737 revisions = [
738 738 ascii_str(repo[rev].hex())
739 739 for rev in revrange(repo, list(map(ascii_bytes, commit_filter)))
740 740 ]
741 741 return revisions
742 742
743 743 return _rev_range(context_uid, repo_id, sorted(commit_filter))
744 744
745 745 @reraise_safe_exceptions
746 746 def rev_range_hash(self, wire, node):
747 747 repo = self._factory.repo(wire)
748 748
749 749 def get_revs(repo, rev_opt):
750 750 if rev_opt:
751 751 revs = revrange(repo, rev_opt)
752 752 if len(revs) == 0:
753 753 return (nullrev, nullrev)
754 754 return max(revs), min(revs)
755 755 else:
756 756 return len(repo) - 1, 0
757 757
758 758 stop, start = get_revs(repo, [node + ':'])
759 759 revs = [ascii_str(repo[r].hex()) for r in range(start, stop + 1)]
760 760 return revs
761 761
762 762 @reraise_safe_exceptions
763 763 def revs_from_revspec(self, wire, rev_spec, *args, **kwargs):
764 764 org_path = safe_bytes(wire["path"])
765 765 other_path = safe_bytes(kwargs.pop('other_path', ''))
766 766
767 767 # case when we want to compare two independent repositories
768 768 if other_path and other_path != wire["path"]:
769 769 baseui = self._factory._create_config(wire["config"])
770 770 repo = unionrepo.makeunionrepository(baseui, other_path, org_path)
771 771 else:
772 772 repo = self._factory.repo(wire)
773 773 return list(repo.revs(rev_spec, *args))
774 774
775 775 @reraise_safe_exceptions
776 776 def verify(self, wire,):
777 777 repo = self._factory.repo(wire)
778 778 baseui = self._factory._create_config(wire['config'])
779 779
780 780 baseui, output = patch_ui_message_output(baseui)
781 781
782 782 repo.ui = baseui
783 783 verify.verify(repo)
784 784 return output.getvalue()
785 785
786 786 @reraise_safe_exceptions
787 787 def hg_update_cache(self, wire,):
788 788 repo = self._factory.repo(wire)
789 789 baseui = self._factory._create_config(wire['config'])
790 790 baseui, output = patch_ui_message_output(baseui)
791 791
792 792 repo.ui = baseui
793 793 with repo.wlock(), repo.lock():
794 794 repo.updatecaches(full=True)
795 795
796 796 return output.getvalue()
797 797
798 798 @reraise_safe_exceptions
799 799 def hg_rebuild_fn_cache(self, wire,):
800 800 repo = self._factory.repo(wire)
801 801 baseui = self._factory._create_config(wire['config'])
802 802 baseui, output = patch_ui_message_output(baseui)
803 803
804 804 repo.ui = baseui
805 805
806 806 repair.rebuildfncache(baseui, repo)
807 807
808 808 return output.getvalue()
809 809
810 810 @reraise_safe_exceptions
811 811 def tags(self, wire):
812 812 cache_on, context_uid, repo_id = self._cache_on(wire)
813 813 region = self._region(wire)
814 814
815 815 @region.conditional_cache_on_arguments(condition=cache_on)
816 816 def _tags(_context_uid, _repo_id):
817 817 repo = self._factory.repo(wire)
818 818 return {safe_str(name): ascii_str(hex(sha)) for name, sha in repo.tags().items()}
819 819
820 820 return _tags(context_uid, repo_id)
821 821
822 822 @reraise_safe_exceptions
823 823 def update(self, wire, node='', clean=False):
824 824 repo = self._factory.repo(wire)
825 825 baseui = self._factory._create_config(wire['config'])
826 826 node = safe_bytes(node)
827 827
828 828 commands.update(baseui, repo, node=node, clean=clean)
829 829
830 830 @reraise_safe_exceptions
831 831 def identify(self, wire):
832 832 repo = self._factory.repo(wire)
833 833 baseui = self._factory._create_config(wire['config'])
834 834 output = io.BytesIO()
835 835 baseui.write = output.write
836 836 # This is required to get a full node id
837 837 baseui.debugflag = True
838 838 commands.identify(baseui, repo, id=True)
839 839
840 840 return output.getvalue()
841 841
842 842 @reraise_safe_exceptions
843 843 def heads(self, wire, branch=None):
844 844 repo = self._factory.repo(wire)
845 845 baseui = self._factory._create_config(wire['config'])
846 846 output = io.BytesIO()
847 847
848 848 def write(data, **unused_kwargs):
849 849 output.write(data)
850 850
851 851 baseui.write = write
852 852 if branch:
853 853 args = [safe_bytes(branch)]
854 854 else:
855 855 args = []
856 856 commands.heads(baseui, repo, template=b'{node} ', *args)
857 857
858 858 return output.getvalue()
859 859
860 860 @reraise_safe_exceptions
861 861 def ancestor(self, wire, revision1, revision2):
862 862 repo = self._factory.repo(wire)
863 863 changelog = repo.changelog
864 864 lookup = repo.lookup
865 865 a = changelog.ancestor(lookup(safe_bytes(revision1)), lookup(safe_bytes(revision2)))
866 866 return hex(a)
867 867
868 868 @reraise_safe_exceptions
869 869 def clone(self, wire, source, dest, update_after_clone=False, hooks=True):
870 870 baseui = self._factory._create_config(wire["config"], hooks=hooks)
871 871 clone(baseui, safe_bytes(source), safe_bytes(dest), noupdate=not update_after_clone)
872 872
873 873 @reraise_safe_exceptions
874 874 def commitctx(self, wire, message, parents, commit_time, commit_timezone, user, files, extra, removed, updated):
875 875
876 876 repo = self._factory.repo(wire)
877 877 baseui = self._factory._create_config(wire['config'])
878 878 publishing = baseui.configbool(b'phases', b'publish')
879 879
880 880 def _filectxfn(_repo, ctx, path: bytes):
881 881 """
882 882 Marks given path as added/changed/removed in a given _repo. This is
883 883 for internal mercurial commit function.
884 884 """
885 885
886 886 # check if this path is removed
887 887 if safe_str(path) in removed:
888 888 # returning None is a way to mark node for removal
889 889 return None
890 890
891 891 # check if this path is added
892 892 for node in updated:
893 893 if safe_bytes(node['path']) == path:
894 894 return memfilectx(
895 895 _repo,
896 896 changectx=ctx,
897 897 path=safe_bytes(node['path']),
898 898 data=safe_bytes(node['content']),
899 899 islink=False,
900 900 isexec=bool(node['mode'] & stat.S_IXUSR),
901 901 copysource=False)
902 902 abort_exc = exceptions.AbortException()
903 903 raise abort_exc(f"Given path haven't been marked as added, changed or removed ({path})")
904 904
905 905 if publishing:
906 906 new_commit_phase = b'public'
907 907 else:
908 908 new_commit_phase = b'draft'
909 909 with repo.ui.configoverride({(b'phases', b'new-commit'): new_commit_phase}):
910 910 kwargs = {safe_bytes(k): safe_bytes(v) for k, v in extra.items()}
911 911 commit_ctx = memctx(
912 912 repo=repo,
913 913 parents=parents,
914 914 text=safe_bytes(message),
915 915 files=[safe_bytes(x) for x in files],
916 916 filectxfn=_filectxfn,
917 917 user=safe_bytes(user),
918 918 date=(commit_time, commit_timezone),
919 919 extra=kwargs)
920 920
921 921 n = repo.commitctx(commit_ctx)
922 922 new_id = hex(n)
923 923
924 924 return new_id
925 925
926 926 @reraise_safe_exceptions
927 927 def pull(self, wire, url, commit_ids=None):
928 928 repo = self._factory.repo(wire)
929 929 # Disable any prompts for this repo
930 930 repo.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
931 931
932 932 remote = peer(repo, {}, safe_bytes(url))
933 933 # Disable any prompts for this remote
934 934 remote.ui.setconfig(b'ui', b'interactive', b'off', b'-y')
935 935
936 936 if commit_ids:
937 937 commit_ids = [bin(commit_id) for commit_id in commit_ids]
938 938
939 939 return exchange.pull(
940 940 repo, remote, heads=commit_ids, force=None).cgresult
941 941
942 942 @reraise_safe_exceptions
943 943 def pull_cmd(self, wire, source, bookmark='', branch='', revision='', hooks=True):
944 944 repo = self._factory.repo(wire)
945 945 baseui = self._factory._create_config(wire['config'], hooks=hooks)
946 946
947 947 source = safe_bytes(source)
948 948
949 949 # Mercurial internally has a lot of logic that checks ONLY if
950 950 # option is defined, we just pass those if they are defined then
951 951 opts = {}
952 952 if bookmark:
953 953 if isinstance(branch, list):
954 954 bookmark = [safe_bytes(x) for x in bookmark]
955 955 else:
956 956 bookmark = safe_bytes(bookmark)
957 957 opts['bookmark'] = bookmark
958 958 if branch:
959 959 if isinstance(branch, list):
960 960 branch = [safe_bytes(x) for x in branch]
961 961 else:
962 962 branch = safe_bytes(branch)
963 963 opts['branch'] = branch
964 964 if revision:
965 965 opts['rev'] = safe_bytes(revision)
966 966
967 967 commands.pull(baseui, repo, source, **opts)
968 968
969 969 @reraise_safe_exceptions
970 970 def push(self, wire, revisions, dest_path, hooks=True, push_branches=False):
971 971 repo = self._factory.repo(wire)
972 972 baseui = self._factory._create_config(wire['config'], hooks=hooks)
973 973 commands.push(baseui, repo, dest=dest_path, rev=revisions,
974 974 new_branch=push_branches)
975 975
976 976 @reraise_safe_exceptions
977 977 def strip(self, wire, revision, update, backup):
978 978 repo = self._factory.repo(wire)
979 979 ctx = self._get_ctx(repo, revision)
980 980 hgext_strip(
981 981 repo.baseui, repo, ctx.node(), update=update, backup=backup)
982 982
983 983 @reraise_safe_exceptions
984 984 def get_unresolved_files(self, wire):
985 985 repo = self._factory.repo(wire)
986 986
987 987 log.debug('Calculating unresolved files for repo: %s', repo)
988 988 output = io.BytesIO()
989 989
990 990 def write(data, **unused_kwargs):
991 991 output.write(data)
992 992
993 993 baseui = self._factory._create_config(wire['config'])
994 994 baseui.write = write
995 995
996 996 commands.resolve(baseui, repo, list=True)
997 997 unresolved = output.getvalue().splitlines(0)
998 998 return unresolved
999 999
1000 1000 @reraise_safe_exceptions
1001 1001 def merge(self, wire, revision):
1002 1002 repo = self._factory.repo(wire)
1003 1003 baseui = self._factory._create_config(wire['config'])
1004 1004 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1005 1005
1006 1006 # In case of sub repositories are used mercurial prompts the user in
1007 1007 # case of merge conflicts or different sub repository sources. By
1008 1008 # setting the interactive flag to `False` mercurial doesn't prompt the
1009 1009 # used but instead uses a default value.
1010 1010 repo.ui.setconfig(b'ui', b'interactive', False)
1011 1011 commands.merge(baseui, repo, rev=revision)
1012 1012
1013 1013 @reraise_safe_exceptions
1014 1014 def merge_state(self, wire):
1015 1015 repo = self._factory.repo(wire)
1016 1016 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1017 1017
1018 1018 # In case of sub repositories are used mercurial prompts the user in
1019 1019 # case of merge conflicts or different sub repository sources. By
1020 1020 # setting the interactive flag to `False` mercurial doesn't prompt the
1021 1021 # used but instead uses a default value.
1022 1022 repo.ui.setconfig(b'ui', b'interactive', False)
1023 1023 ms = hg_merge.mergestate(repo)
1024 1024 return [x for x in ms.unresolved()]
1025 1025
1026 1026 @reraise_safe_exceptions
1027 1027 def commit(self, wire, message, username, close_branch=False):
1028 1028 repo = self._factory.repo(wire)
1029 1029 baseui = self._factory._create_config(wire['config'])
1030 1030 repo.ui.setconfig(b'ui', b'username', username)
1031 1031 commands.commit(baseui, repo, message=message, close_branch=close_branch)
1032 1032
1033 1033 @reraise_safe_exceptions
1034 1034 def rebase(self, wire, source=None, dest=None, abort=False):
1035 1035 repo = self._factory.repo(wire)
1036 1036 baseui = self._factory._create_config(wire['config'])
1037 1037 repo.ui.setconfig(b'ui', b'merge', b'internal:dump')
1038 1038 # In case of sub repositories are used mercurial prompts the user in
1039 1039 # case of merge conflicts or different sub repository sources. By
1040 1040 # setting the interactive flag to `False` mercurial doesn't prompt the
1041 1041 # used but instead uses a default value.
1042 1042 repo.ui.setconfig(b'ui', b'interactive', False)
1043 1043 rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort)
1044 1044
1045 1045 @reraise_safe_exceptions
1046 1046 def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone):
1047 1047 repo = self._factory.repo(wire)
1048 1048 ctx = self._get_ctx(repo, revision)
1049 1049 node = ctx.node()
1050 1050
1051 1051 date = (tag_time, tag_timezone)
1052 1052 try:
1053 1053 hg_tag.tag(repo, name, node, message, local, user, date)
1054 1054 except Abort as e:
1055 1055 log.exception("Tag operation aborted")
1056 1056 # Exception can contain unicode which we convert
1057 1057 raise exceptions.AbortException(e)(repr(e))
1058 1058
1059 1059 @reraise_safe_exceptions
1060 1060 def bookmark(self, wire, bookmark, revision=''):
1061 1061 repo = self._factory.repo(wire)
1062 1062 baseui = self._factory._create_config(wire['config'])
1063 1063 commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True)
1064 1064
1065 1065 @reraise_safe_exceptions
1066 1066 def install_hooks(self, wire, force=False):
1067 1067 # we don't need any special hooks for Mercurial
1068 1068 pass
1069 1069
1070 1070 @reraise_safe_exceptions
1071 1071 def get_hooks_info(self, wire):
1072 1072 return {
1073 1073 'pre_version': vcsserver.__version__,
1074 1074 'post_version': vcsserver.__version__,
1075 1075 }
1076 1076
1077 1077 @reraise_safe_exceptions
1078 1078 def set_head_ref(self, wire, head_name):
1079 1079 pass
1080 1080
1081 1081 @reraise_safe_exceptions
1082 1082 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
1083 1083 archive_dir_name, commit_id):
1084 1084
1085 1085 def file_walker(_commit_id, path):
1086 1086 repo = self._factory.repo(wire)
1087 1087 ctx = repo[_commit_id]
1088 1088 is_root = path in ['', '/']
1089 1089 if is_root:
1090 1090 matcher = alwaysmatcher(badfn=None)
1091 1091 else:
1092 1092 matcher = patternmatcher('', [(b'glob', path+'/**', b'')], badfn=None)
1093 1093 file_iter = ctx.manifest().walk(matcher)
1094 1094
1095 1095 for fn in file_iter:
1096 1096 file_path = fn
1097 1097 flags = ctx.flags(fn)
1098 1098 mode = b'x' in flags and 0o755 or 0o644
1099 1099 is_link = b'l' in flags
1100 1100
1101 1101 yield ArchiveNode(file_path, mode, is_link, ctx[fn].data)
1102 1102
1103 1103 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
1104 1104 archive_dir_name, commit_id)
1105 1105
@@ -1,891 +1,890 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18
19 19 import os
20 20 import subprocess
21 21 from urllib.error import URLError
22 22 import urllib.parse
23 23 import logging
24 24 import posixpath as vcspath
25 25 import io
26 26 import urllib.request
27 27 import urllib.parse
28 28 import urllib.error
29 29 import traceback
30 30
31 31
32 32 import svn.client # noqa
33 33 import svn.core # noqa
34 34 import svn.delta # noqa
35 35 import svn.diff # noqa
36 36 import svn.fs # noqa
37 37 import svn.repos # noqa
38 38
39 39 from vcsserver import svn_diff, exceptions, subprocessio, settings
40 40 from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo, BinaryEnvelope
41 41 from vcsserver.exceptions import NoContentException
42 42 from vcsserver.str_utils import safe_str, safe_bytes
43 43 from vcsserver.vcs_base import RemoteBase
44 44 from vcsserver.lib.svnremoterepo import svnremoterepo
45 45 log = logging.getLogger(__name__)
46 46
47 47
48 48 svn_compatible_versions_map = {
49 49 'pre-1.4-compatible': '1.3',
50 50 'pre-1.5-compatible': '1.4',
51 51 'pre-1.6-compatible': '1.5',
52 52 'pre-1.8-compatible': '1.7',
53 53 'pre-1.9-compatible': '1.8',
54 54 }
55 55
56 56 current_compatible_version = '1.14'
57 57
58 58
59 59 def reraise_safe_exceptions(func):
60 60 """Decorator for converting svn exceptions to something neutral."""
61 61 def wrapper(*args, **kwargs):
62 62 try:
63 63 return func(*args, **kwargs)
64 64 except Exception as e:
65 65 if not hasattr(e, '_vcs_kind'):
66 66 log.exception("Unhandled exception in svn remote call")
67 67 raise_from_original(exceptions.UnhandledException(e), e)
68 68 raise
69 69 return wrapper
70 70
71 71
72 72 class SubversionFactory(RepoFactory):
73 73 repo_type = 'svn'
74 74
75 75 def _create_repo(self, wire, create, compatible_version):
76 76 path = svn.core.svn_path_canonicalize(wire['path'])
77 77 if create:
78 78 fs_config = {'compatible-version': current_compatible_version}
79 79 if compatible_version:
80 80
81 81 compatible_version_string = \
82 82 svn_compatible_versions_map.get(compatible_version) \
83 83 or compatible_version
84 84 fs_config['compatible-version'] = compatible_version_string
85 85
86 86 log.debug('Create SVN repo with config `%s`', fs_config)
87 87 repo = svn.repos.create(path, "", "", None, fs_config)
88 88 else:
89 89 repo = svn.repos.open(path)
90 90
91 91 log.debug('repository created: got SVN object: %s', repo)
92 92 return repo
93 93
94 94 def repo(self, wire, create=False, compatible_version=None):
95 95 """
96 96 Get a repository instance for the given path.
97 97 """
98 98 return self._create_repo(wire, create, compatible_version)
99 99
100 100
101 101 NODE_TYPE_MAPPING = {
102 102 svn.core.svn_node_file: 'file',
103 103 svn.core.svn_node_dir: 'dir',
104 104 }
105 105
106 106
107 107 class SvnRemote(RemoteBase):
108 108
109 109 def __init__(self, factory, hg_factory=None):
110 110 self._factory = factory
111 111
112 112 @reraise_safe_exceptions
113 113 def discover_svn_version(self):
114 114 try:
115 115 import svn.core
116 116 svn_ver = svn.core.SVN_VERSION
117 117 except ImportError:
118 118 svn_ver = None
119 119 return safe_str(svn_ver)
120 120
121 121 @reraise_safe_exceptions
122 122 def is_empty(self, wire):
123 123
124 124 try:
125 125 return self.lookup(wire, -1) == 0
126 126 except Exception:
127 127 log.exception("failed to read object_store")
128 128 return False
129 129
130 130 def check_url(self, url):
131 131
132 132 # uuid function get's only valid UUID from proper repo, else
133 133 # throws exception
134 134 username, password, src_url = self.get_url_and_credentials(url)
135 135 try:
136 136 svnremoterepo(username, password, src_url).svn().uuid
137 137 except Exception:
138 138 tb = traceback.format_exc()
139 139 log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb)
140 140 raise URLError(
141 '"%s" is not a valid Subversion source url.' % (url, ))
141 '"{}" is not a valid Subversion source url.'.format(url))
142 142 return True
143 143
144 144 def is_path_valid_repository(self, wire, path):
145 145
146 146 # NOTE(marcink): short circuit the check for SVN repo
147 147 # the repos.open might be expensive to check, but we have one cheap
148 148 # pre condition that we can use, to check for 'format' file
149 149
150 150 if not os.path.isfile(os.path.join(path, 'format')):
151 151 return False
152 152
153 153 try:
154 154 svn.repos.open(path)
155 155 except svn.core.SubversionException:
156 156 tb = traceback.format_exc()
157 157 log.debug("Invalid Subversion path `%s`, tb: %s", path, tb)
158 158 return False
159 159 return True
160 160
161 161 @reraise_safe_exceptions
162 162 def verify(self, wire,):
163 163 repo_path = wire['path']
164 164 if not self.is_path_valid_repository(wire, repo_path):
165 165 raise Exception(
166 166 "Path %s is not a valid Subversion repository." % repo_path)
167 167
168 168 cmd = ['svnadmin', 'info', repo_path]
169 169 stdout, stderr = subprocessio.run_command(cmd)
170 170 return stdout
171 171
172 172 def lookup(self, wire, revision):
173 173 if revision not in [-1, None, 'HEAD']:
174 174 raise NotImplementedError
175 175 repo = self._factory.repo(wire)
176 176 fs_ptr = svn.repos.fs(repo)
177 177 head = svn.fs.youngest_rev(fs_ptr)
178 178 return head
179 179
180 180 def lookup_interval(self, wire, start_ts, end_ts):
181 181 repo = self._factory.repo(wire)
182 182 fsobj = svn.repos.fs(repo)
183 183 start_rev = None
184 184 end_rev = None
185 185 if start_ts:
186 186 start_ts_svn = apr_time_t(start_ts)
187 187 start_rev = svn.repos.dated_revision(repo, start_ts_svn) + 1
188 188 else:
189 189 start_rev = 1
190 190 if end_ts:
191 191 end_ts_svn = apr_time_t(end_ts)
192 192 end_rev = svn.repos.dated_revision(repo, end_ts_svn)
193 193 else:
194 194 end_rev = svn.fs.youngest_rev(fsobj)
195 195 return start_rev, end_rev
196 196
197 197 def revision_properties(self, wire, revision):
198 198
199 199 cache_on, context_uid, repo_id = self._cache_on(wire)
200 200 region = self._region(wire)
201 201 @region.conditional_cache_on_arguments(condition=cache_on)
202 202 def _revision_properties(_repo_id, _revision):
203 203 repo = self._factory.repo(wire)
204 204 fs_ptr = svn.repos.fs(repo)
205 205 return svn.fs.revision_proplist(fs_ptr, revision)
206 206 return _revision_properties(repo_id, revision)
207 207
208 208 def revision_changes(self, wire, revision):
209 209
210 210 repo = self._factory.repo(wire)
211 211 fsobj = svn.repos.fs(repo)
212 212 rev_root = svn.fs.revision_root(fsobj, revision)
213 213
214 214 editor = svn.repos.ChangeCollector(fsobj, rev_root)
215 215 editor_ptr, editor_baton = svn.delta.make_editor(editor)
216 216 base_dir = ""
217 217 send_deltas = False
218 218 svn.repos.replay2(
219 219 rev_root, base_dir, svn.core.SVN_INVALID_REVNUM, send_deltas,
220 220 editor_ptr, editor_baton, None)
221 221
222 222 added = []
223 223 changed = []
224 224 removed = []
225 225
226 226 # TODO: CHANGE_ACTION_REPLACE: Figure out where it belongs
227 227 for path, change in editor.changes.items():
228 228 # TODO: Decide what to do with directory nodes. Subversion can add
229 229 # empty directories.
230 230
231 231 if change.item_kind == svn.core.svn_node_dir:
232 232 continue
233 233 if change.action in [svn.repos.CHANGE_ACTION_ADD]:
234 234 added.append(path)
235 235 elif change.action in [svn.repos.CHANGE_ACTION_MODIFY,
236 236 svn.repos.CHANGE_ACTION_REPLACE]:
237 237 changed.append(path)
238 238 elif change.action in [svn.repos.CHANGE_ACTION_DELETE]:
239 239 removed.append(path)
240 240 else:
241 241 raise NotImplementedError(
242 "Action %s not supported on path %s" % (
242 "Action {} not supported on path {}".format(
243 243 change.action, path))
244 244
245 245 changes = {
246 246 'added': added,
247 247 'changed': changed,
248 248 'removed': removed,
249 249 }
250 250 return changes
251 251
252 252 @reraise_safe_exceptions
253 253 def node_history(self, wire, path, revision, limit):
254 254 cache_on, context_uid, repo_id = self._cache_on(wire)
255 255 region = self._region(wire)
256 256 @region.conditional_cache_on_arguments(condition=cache_on)
257 257 def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit):
258 258 cross_copies = False
259 259 repo = self._factory.repo(wire)
260 260 fsobj = svn.repos.fs(repo)
261 261 rev_root = svn.fs.revision_root(fsobj, revision)
262 262
263 263 history_revisions = []
264 264 history = svn.fs.node_history(rev_root, path)
265 265 history = svn.fs.history_prev(history, cross_copies)
266 266 while history:
267 267 __, node_revision = svn.fs.history_location(history)
268 268 history_revisions.append(node_revision)
269 269 if limit and len(history_revisions) >= limit:
270 270 break
271 271 history = svn.fs.history_prev(history, cross_copies)
272 272 return history_revisions
273 273 return _assert_correct_path(context_uid, repo_id, path, revision, limit)
274 274
275 275 def node_properties(self, wire, path, revision):
276 276 cache_on, context_uid, repo_id = self._cache_on(wire)
277 277 region = self._region(wire)
278 278
279 279 @region.conditional_cache_on_arguments(condition=cache_on)
280 280 def _node_properties(_repo_id, _path, _revision):
281 281 repo = self._factory.repo(wire)
282 282 fsobj = svn.repos.fs(repo)
283 283 rev_root = svn.fs.revision_root(fsobj, revision)
284 284 return svn.fs.node_proplist(rev_root, path)
285 285 return _node_properties(repo_id, path, revision)
286 286
287 287 def file_annotate(self, wire, path, revision):
288 288 abs_path = 'file://' + urllib.request.pathname2url(
289 289 vcspath.join(wire['path'], path))
290 290 file_uri = svn.core.svn_path_canonicalize(abs_path)
291 291
292 292 start_rev = svn_opt_revision_value_t(0)
293 293 peg_rev = svn_opt_revision_value_t(revision)
294 294 end_rev = peg_rev
295 295
296 296 annotations = []
297 297
298 298 def receiver(line_no, revision, author, date, line, pool):
299 299 annotations.append((line_no, revision, line))
300 300
301 301 # TODO: Cannot use blame5, missing typemap function in the swig code
302 302 try:
303 303 svn.client.blame2(
304 304 file_uri, peg_rev, start_rev, end_rev,
305 305 receiver, svn.client.create_context())
306 306 except svn.core.SubversionException as exc:
307 307 log.exception("Error during blame operation.")
308 308 raise Exception(
309 309 "Blame not supported or file does not exist at path %s. "
310 310 "Error %s." % (path, exc))
311 311
312 312 return annotations
313 313
314 314 def get_node_type(self, wire, path, revision=None):
315 315
316 316 cache_on, context_uid, repo_id = self._cache_on(wire)
317 317 region = self._region(wire)
318 318
319 319 @region.conditional_cache_on_arguments(condition=cache_on)
320 320 def _get_node_type(_repo_id, _path, _revision):
321 321 repo = self._factory.repo(wire)
322 322 fs_ptr = svn.repos.fs(repo)
323 323 if _revision is None:
324 324 _revision = svn.fs.youngest_rev(fs_ptr)
325 325 root = svn.fs.revision_root(fs_ptr, _revision)
326 326 node = svn.fs.check_path(root, path)
327 327 return NODE_TYPE_MAPPING.get(node, None)
328 328 return _get_node_type(repo_id, path, revision)
329 329
330 330 def get_nodes(self, wire, path, revision=None):
331 331
332 332 cache_on, context_uid, repo_id = self._cache_on(wire)
333 333 region = self._region(wire)
334 334
335 335 @region.conditional_cache_on_arguments(condition=cache_on)
336 336 def _get_nodes(_repo_id, _path, _revision):
337 337 repo = self._factory.repo(wire)
338 338 fsobj = svn.repos.fs(repo)
339 339 if _revision is None:
340 340 _revision = svn.fs.youngest_rev(fsobj)
341 341 root = svn.fs.revision_root(fsobj, _revision)
342 342 entries = svn.fs.dir_entries(root, path)
343 343 result = []
344 344 for entry_path, entry_info in entries.items():
345 345 result.append(
346 346 (entry_path, NODE_TYPE_MAPPING.get(entry_info.kind, None)))
347 347 return result
348 348 return _get_nodes(repo_id, path, revision)
349 349
350 350 def get_file_content(self, wire, path, rev=None):
351 351 repo = self._factory.repo(wire)
352 352 fsobj = svn.repos.fs(repo)
353 353 if rev is None:
354 354 rev = svn.fs.youngest_revision(fsobj)
355 355 root = svn.fs.revision_root(fsobj, rev)
356 356 content = svn.core.Stream(svn.fs.file_contents(root, path))
357 357 return BinaryEnvelope(content.read())
358 358
359 359 def get_file_size(self, wire, path, revision=None):
360 360
361 361 cache_on, context_uid, repo_id = self._cache_on(wire)
362 362 region = self._region(wire)
363 363
364 364 @region.conditional_cache_on_arguments(condition=cache_on)
365 365 def _get_file_size(_repo_id, _path, _revision):
366 366 repo = self._factory.repo(wire)
367 367 fsobj = svn.repos.fs(repo)
368 368 if _revision is None:
369 369 _revision = svn.fs.youngest_revision(fsobj)
370 370 root = svn.fs.revision_root(fsobj, _revision)
371 371 size = svn.fs.file_length(root, path)
372 372 return size
373 373 return _get_file_size(repo_id, path, revision)
374 374
375 375 def create_repository(self, wire, compatible_version=None):
376 376 log.info('Creating Subversion repository in path "%s"', wire['path'])
377 377 self._factory.repo(wire, create=True,
378 378 compatible_version=compatible_version)
379 379
380 380 def get_url_and_credentials(self, src_url):
381 381 obj = urllib.parse.urlparse(src_url)
382 382 username = obj.username or None
383 383 password = obj.password or None
384 384 return username, password, src_url
385 385
386 386 def import_remote_repository(self, wire, src_url):
387 387 repo_path = wire['path']
388 388 if not self.is_path_valid_repository(wire, repo_path):
389 389 raise Exception(
390 390 "Path %s is not a valid Subversion repository." % repo_path)
391 391
392 392 username, password, src_url = self.get_url_and_credentials(src_url)
393 393 rdump_cmd = ['svnrdump', 'dump', '--non-interactive',
394 394 '--trust-server-cert-failures=unknown-ca']
395 395 if username and password:
396 396 rdump_cmd += ['--username', username, '--password', password]
397 397 rdump_cmd += [src_url]
398 398
399 399 rdump = subprocess.Popen(
400 400 rdump_cmd,
401 401 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
402 402 load = subprocess.Popen(
403 403 ['svnadmin', 'load', repo_path], stdin=rdump.stdout)
404 404
405 405 # TODO: johbo: This can be a very long operation, might be better
406 406 # to track some kind of status and provide an api to check if the
407 407 # import is done.
408 408 rdump.wait()
409 409 load.wait()
410 410
411 411 log.debug('Return process ended with code: %s', rdump.returncode)
412 412 if rdump.returncode != 0:
413 413 errors = rdump.stderr.read()
414 414 log.error('svnrdump dump failed: statuscode %s: message: %s', rdump.returncode, errors)
415 415
416 416 reason = 'UNKNOWN'
417 417 if b'svnrdump: E230001:' in errors:
418 418 reason = 'INVALID_CERTIFICATE'
419 419
420 420 if reason == 'UNKNOWN':
421 reason = 'UNKNOWN:{}'.format(safe_str(errors))
421 reason = f'UNKNOWN:{safe_str(errors)}'
422 422
423 423 raise Exception(
424 'Failed to dump the remote repository from %s. Reason:%s' % (
424 'Failed to dump the remote repository from {}. Reason:{}'.format(
425 425 src_url, reason))
426 426 if load.returncode != 0:
427 427 raise Exception(
428 428 'Failed to load the dump of remote repository from %s.' %
429 429 (src_url, ))
430 430
431 431 def commit(self, wire, message, author, timestamp, updated, removed):
432 432
433 433 updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated]
434 434
435 435 message = safe_bytes(message)
436 436 author = safe_bytes(author)
437 437
438 438 repo = self._factory.repo(wire)
439 439 fsobj = svn.repos.fs(repo)
440 440
441 441 rev = svn.fs.youngest_rev(fsobj)
442 442 txn = svn.repos.fs_begin_txn_for_commit(repo, rev, author, message)
443 443 txn_root = svn.fs.txn_root(txn)
444 444
445 445 for node in updated:
446 446 TxnNodeProcessor(node, txn_root).update()
447 447 for node in removed:
448 448 TxnNodeProcessor(node, txn_root).remove()
449 449
450 450 commit_id = svn.repos.fs_commit_txn(repo, txn)
451 451
452 452 if timestamp:
453 453 apr_time = int(apr_time_t(timestamp))
454 454 ts_formatted = svn.core.svn_time_to_cstring(apr_time)
455 455 svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted)
456 456
457 457 log.debug('Committed revision "%s" to "%s".', commit_id, wire['path'])
458 458 return commit_id
459 459
460 460 def diff(self, wire, rev1, rev2, path1=None, path2=None,
461 461 ignore_whitespace=False, context=3):
462 462
463 463 wire.update(cache=False)
464 464 repo = self._factory.repo(wire)
465 465 diff_creator = SvnDiffer(
466 466 repo, rev1, path1, rev2, path2, ignore_whitespace, context)
467 467 try:
468 468 return BinaryEnvelope(diff_creator.generate_diff())
469 469 except svn.core.SubversionException as e:
470 470 log.exception(
471 471 "Error during diff operation operation. "
472 472 "Path might not exist %s, %s", path1, path2)
473 473 return BinaryEnvelope(b'')
474 474
475 475 @reraise_safe_exceptions
476 476 def is_large_file(self, wire, path):
477 477 return False
478 478
479 479 @reraise_safe_exceptions
480 480 def is_binary(self, wire, rev, path):
481 481 cache_on, context_uid, repo_id = self._cache_on(wire)
482 482 region = self._region(wire)
483 483
484 484 @region.conditional_cache_on_arguments(condition=cache_on)
485 485 def _is_binary(_repo_id, _rev, _path):
486 486 raw_bytes = self.get_file_content(wire, path, rev)
487 487 return raw_bytes and b'\0' in raw_bytes
488 488
489 489 return _is_binary(repo_id, rev, path)
490 490
491 491 @reraise_safe_exceptions
492 492 def md5_hash(self, wire, rev, path):
493 493 cache_on, context_uid, repo_id = self._cache_on(wire)
494 494 region = self._region(wire)
495 495
496 496 @region.conditional_cache_on_arguments(condition=cache_on)
497 497 def _md5_hash(_repo_id, _rev, _path):
498 498 return ''
499 499
500 500 return _md5_hash(repo_id, rev, path)
501 501
502 502 @reraise_safe_exceptions
503 503 def run_svn_command(self, wire, cmd, **opts):
504 504 path = wire.get('path', None)
505 505
506 506 if path and os.path.isdir(path):
507 507 opts['cwd'] = path
508 508
509 509 safe_call = opts.pop('_safe', False)
510 510
511 511 svnenv = os.environ.copy()
512 512 svnenv.update(opts.pop('extra_env', {}))
513 513
514 514 _opts = {'env': svnenv, 'shell': False}
515 515
516 516 try:
517 517 _opts.update(opts)
518 518 proc = subprocessio.SubprocessIOChunker(cmd, **_opts)
519 519
520 520 return b''.join(proc), b''.join(proc.stderr)
521 521 except OSError as err:
522 522 if safe_call:
523 523 return '', safe_str(err).strip()
524 524 else:
525 525 cmd = ' '.join(map(safe_str, cmd)) # human friendly CMD
526 526 tb_err = ("Couldn't run svn command (%s).\n"
527 527 "Original error was:%s\n"
528 528 "Call options:%s\n"
529 529 % (cmd, err, _opts))
530 530 log.exception(tb_err)
531 531 raise exceptions.VcsException()(tb_err)
532 532
533 533 @reraise_safe_exceptions
534 534 def install_hooks(self, wire, force=False):
535 535 from vcsserver.hook_utils import install_svn_hooks
536 536 repo_path = wire['path']
537 537 binary_dir = settings.BINARY_DIR
538 538 executable = None
539 539 if binary_dir:
540 540 executable = os.path.join(binary_dir, 'python3')
541 541 return install_svn_hooks(repo_path, force_create=force)
542 542
543 543 @reraise_safe_exceptions
544 544 def get_hooks_info(self, wire):
545 545 from vcsserver.hook_utils import (
546 546 get_svn_pre_hook_version, get_svn_post_hook_version)
547 547 repo_path = wire['path']
548 548 return {
549 549 'pre_version': get_svn_pre_hook_version(repo_path),
550 550 'post_version': get_svn_post_hook_version(repo_path),
551 551 }
552 552
553 553 @reraise_safe_exceptions
554 554 def set_head_ref(self, wire, head_name):
555 555 pass
556 556
557 557 @reraise_safe_exceptions
558 558 def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path,
559 559 archive_dir_name, commit_id):
560 560
561 561 def walk_tree(root, root_dir, _commit_id):
562 562 """
563 563 Special recursive svn repo walker
564 564 """
565 565 root_dir = safe_bytes(root_dir)
566 566
567 567 filemode_default = 0o100644
568 568 filemode_executable = 0o100755
569 569
570 570 file_iter = svn.fs.dir_entries(root, root_dir)
571 571 for f_name in file_iter:
572 572 f_type = NODE_TYPE_MAPPING.get(file_iter[f_name].kind, None)
573 573
574 574 if f_type == 'dir':
575 575 # return only DIR, and then all entries in that dir
576 576 yield os.path.join(root_dir, f_name), {'mode': filemode_default}, f_type
577 577 new_root = os.path.join(root_dir, f_name)
578 for _f_name, _f_data, _f_type in walk_tree(root, new_root, _commit_id):
579 yield _f_name, _f_data, _f_type
578 yield from walk_tree(root, new_root, _commit_id)
580 579 else:
581 580
582 581 f_path = os.path.join(root_dir, f_name).rstrip(b'/')
583 582 prop_list = svn.fs.node_proplist(root, f_path)
584 583
585 584 f_mode = filemode_default
586 585 if prop_list.get('svn:executable'):
587 586 f_mode = filemode_executable
588 587
589 588 f_is_link = False
590 589 if prop_list.get('svn:special'):
591 590 f_is_link = True
592 591
593 592 data = {
594 593 'is_link': f_is_link,
595 594 'mode': f_mode,
596 595 'content_stream': svn.core.Stream(svn.fs.file_contents(root, f_path)).read
597 596 }
598 597
599 598 yield f_path, data, f_type
600 599
601 600 def file_walker(_commit_id, path):
602 601 repo = self._factory.repo(wire)
603 602 root = svn.fs.revision_root(svn.repos.fs(repo), int(commit_id))
604 603
605 604 def no_content():
606 605 raise NoContentException()
607 606
608 607 for f_name, f_data, f_type in walk_tree(root, path, _commit_id):
609 608 file_path = f_name
610 609
611 610 if f_type == 'dir':
612 611 mode = f_data['mode']
613 612 yield ArchiveNode(file_path, mode, False, no_content)
614 613 else:
615 614 mode = f_data['mode']
616 615 is_link = f_data['is_link']
617 616 data_stream = f_data['content_stream']
618 617 yield ArchiveNode(file_path, mode, is_link, data_stream)
619 618
620 619 return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path,
621 620 archive_dir_name, commit_id)
622 621
623 622
624 623 class SvnDiffer(object):
625 624 """
626 625 Utility to create diffs based on difflib and the Subversion api
627 626 """
628 627
629 628 binary_content = False
630 629
631 630 def __init__(
632 631 self, repo, src_rev, src_path, tgt_rev, tgt_path,
633 632 ignore_whitespace, context):
634 633 self.repo = repo
635 634 self.ignore_whitespace = ignore_whitespace
636 635 self.context = context
637 636
638 637 fsobj = svn.repos.fs(repo)
639 638
640 639 self.tgt_rev = tgt_rev
641 640 self.tgt_path = tgt_path or ''
642 641 self.tgt_root = svn.fs.revision_root(fsobj, tgt_rev)
643 642 self.tgt_kind = svn.fs.check_path(self.tgt_root, self.tgt_path)
644 643
645 644 self.src_rev = src_rev
646 645 self.src_path = src_path or self.tgt_path
647 646 self.src_root = svn.fs.revision_root(fsobj, src_rev)
648 647 self.src_kind = svn.fs.check_path(self.src_root, self.src_path)
649 648
650 649 self._validate()
651 650
652 651 def _validate(self):
653 652 if (self.tgt_kind != svn.core.svn_node_none and
654 653 self.src_kind != svn.core.svn_node_none and
655 654 self.src_kind != self.tgt_kind):
656 655 # TODO: johbo: proper error handling
657 656 raise Exception(
658 657 "Source and target are not compatible for diff generation. "
659 658 "Source type: %s, target type: %s" %
660 659 (self.src_kind, self.tgt_kind))
661 660
662 661 def generate_diff(self):
663 662 buf = io.StringIO()
664 663 if self.tgt_kind == svn.core.svn_node_dir:
665 664 self._generate_dir_diff(buf)
666 665 else:
667 666 self._generate_file_diff(buf)
668 667 return buf.getvalue()
669 668
670 669 def _generate_dir_diff(self, buf):
671 670 editor = DiffChangeEditor()
672 671 editor_ptr, editor_baton = svn.delta.make_editor(editor)
673 672 svn.repos.dir_delta2(
674 673 self.src_root,
675 674 self.src_path,
676 675 '', # src_entry
677 676 self.tgt_root,
678 677 self.tgt_path,
679 678 editor_ptr, editor_baton,
680 679 authorization_callback_allow_all,
681 680 False, # text_deltas
682 681 svn.core.svn_depth_infinity, # depth
683 682 False, # entry_props
684 683 False, # ignore_ancestry
685 684 )
686 685
687 686 for path, __, change in sorted(editor.changes):
688 687 self._generate_node_diff(
689 688 buf, change, path, self.tgt_path, path, self.src_path)
690 689
691 690 def _generate_file_diff(self, buf):
692 691 change = None
693 692 if self.src_kind == svn.core.svn_node_none:
694 693 change = "add"
695 694 elif self.tgt_kind == svn.core.svn_node_none:
696 695 change = "delete"
697 696 tgt_base, tgt_path = vcspath.split(self.tgt_path)
698 697 src_base, src_path = vcspath.split(self.src_path)
699 698 self._generate_node_diff(
700 699 buf, change, tgt_path, tgt_base, src_path, src_base)
701 700
702 701 def _generate_node_diff(
703 702 self, buf, change, tgt_path, tgt_base, src_path, src_base):
704 703
705 704
706 705 tgt_path = safe_str(tgt_path)
707 706 src_path = safe_str(src_path)
708 707
709 708
710 709 if self.src_rev == self.tgt_rev and tgt_base == src_base:
711 710 # makes consistent behaviour with git/hg to return empty diff if
712 711 # we compare same revisions
713 712 return
714 713
715 714 tgt_full_path = vcspath.join(tgt_base, tgt_path)
716 715 src_full_path = vcspath.join(src_base, src_path)
717 716
718 717 self.binary_content = False
719 718 mime_type = self._get_mime_type(tgt_full_path)
720 719
721 720 if mime_type and not mime_type.startswith('text'):
722 721 self.binary_content = True
723 722 buf.write("=" * 67 + '\n')
724 723 buf.write("Cannot display: file marked as a binary type.\n")
725 724 buf.write("svn:mime-type = %s\n" % mime_type)
726 buf.write("Index: %s\n" % (tgt_path, ))
725 buf.write("Index: {}\n".format(tgt_path))
727 726 buf.write("=" * 67 + '\n')
728 buf.write("diff --git a/%(tgt_path)s b/%(tgt_path)s\n" % {
729 'tgt_path': tgt_path})
727 buf.write("diff --git a/{tgt_path} b/{tgt_path}\n".format(
728 tgt_path=tgt_path))
730 729
731 730 if change == 'add':
732 731 # TODO: johbo: SVN is missing a zero here compared to git
733 732 buf.write("new file mode 10644\n")
734 733
735 734 #TODO(marcink): intro to binary detection of svn patches
736 735 # if self.binary_content:
737 736 # buf.write('GIT binary patch\n')
738 737
739 738 buf.write("--- /dev/null\t(revision 0)\n")
740 739 src_lines = []
741 740 else:
742 741 if change == 'delete':
743 742 buf.write("deleted file mode 10644\n")
744 743
745 744 #TODO(marcink): intro to binary detection of svn patches
746 745 # if self.binary_content:
747 746 # buf.write('GIT binary patch\n')
748 747
749 buf.write("--- a/%s\t(revision %s)\n" % (
748 buf.write("--- a/{}\t(revision {})\n".format(
750 749 src_path, self.src_rev))
751 750 src_lines = self._svn_readlines(self.src_root, src_full_path)
752 751
753 752 if change == 'delete':
754 buf.write("+++ /dev/null\t(revision %s)\n" % (self.tgt_rev, ))
753 buf.write("+++ /dev/null\t(revision {})\n".format(self.tgt_rev))
755 754 tgt_lines = []
756 755 else:
757 buf.write("+++ b/%s\t(revision %s)\n" % (
756 buf.write("+++ b/{}\t(revision {})\n".format(
758 757 tgt_path, self.tgt_rev))
759 758 tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path)
760 759
761 760 if not self.binary_content:
762 761 udiff = svn_diff.unified_diff(
763 762 src_lines, tgt_lines, context=self.context,
764 763 ignore_blank_lines=self.ignore_whitespace,
765 764 ignore_case=False,
766 765 ignore_space_changes=self.ignore_whitespace)
767 766
768 767 buf.writelines(udiff)
769 768
770 769 def _get_mime_type(self, path):
771 770 try:
772 771 mime_type = svn.fs.node_prop(
773 772 self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE)
774 773 except svn.core.SubversionException:
775 774 mime_type = svn.fs.node_prop(
776 775 self.src_root, path, svn.core.SVN_PROP_MIME_TYPE)
777 776 return mime_type
778 777
779 778 def _svn_readlines(self, fs_root, node_path):
780 779 if self.binary_content:
781 780 return []
782 781 node_kind = svn.fs.check_path(fs_root, node_path)
783 782 if node_kind not in (
784 783 svn.core.svn_node_file, svn.core.svn_node_symlink):
785 784 return []
786 785 content = svn.core.Stream(
787 786 svn.fs.file_contents(fs_root, node_path)).read()
788 787
789 788 return content.splitlines(True)
790 789
791 790
792 791 class DiffChangeEditor(svn.delta.Editor):
793 792 """
794 793 Records changes between two given revisions
795 794 """
796 795
797 796 def __init__(self):
798 797 self.changes = []
799 798
800 799 def delete_entry(self, path, revision, parent_baton, pool=None):
801 800 self.changes.append((path, None, 'delete'))
802 801
803 802 def add_file(
804 803 self, path, parent_baton, copyfrom_path, copyfrom_revision,
805 804 file_pool=None):
806 805 self.changes.append((path, 'file', 'add'))
807 806
808 807 def open_file(self, path, parent_baton, base_revision, file_pool=None):
809 808 self.changes.append((path, 'file', 'change'))
810 809
811 810
812 811 def authorization_callback_allow_all(root, path, pool):
813 812 return True
814 813
815 814
816 815 class TxnNodeProcessor(object):
817 816 """
818 817 Utility to process the change of one node within a transaction root.
819 818
820 819 It encapsulates the knowledge of how to add, update or remove
821 820 a node for a given transaction root. The purpose is to support the method
822 821 `SvnRemote.commit`.
823 822 """
824 823
825 824 def __init__(self, node, txn_root):
826 825 assert isinstance(node['path'], bytes)
827 826
828 827 self.node = node
829 828 self.txn_root = txn_root
830 829
831 830 def update(self):
832 831 self._ensure_parent_dirs()
833 832 self._add_file_if_node_does_not_exist()
834 833 self._update_file_content()
835 834 self._update_file_properties()
836 835
837 836 def remove(self):
838 837 svn.fs.delete(self.txn_root, self.node['path'])
839 838 # TODO: Clean up directory if empty
840 839
841 840 def _ensure_parent_dirs(self):
842 841 curdir = vcspath.dirname(self.node['path'])
843 842 dirs_to_create = []
844 843 while not self._svn_path_exists(curdir):
845 844 dirs_to_create.append(curdir)
846 845 curdir = vcspath.dirname(curdir)
847 846
848 847 for curdir in reversed(dirs_to_create):
849 848 log.debug('Creating missing directory "%s"', curdir)
850 849 svn.fs.make_dir(self.txn_root, curdir)
851 850
852 851 def _svn_path_exists(self, path):
853 852 path_status = svn.fs.check_path(self.txn_root, path)
854 853 return path_status != svn.core.svn_node_none
855 854
856 855 def _add_file_if_node_does_not_exist(self):
857 856 kind = svn.fs.check_path(self.txn_root, self.node['path'])
858 857 if kind == svn.core.svn_node_none:
859 858 svn.fs.make_file(self.txn_root, self.node['path'])
860 859
861 860 def _update_file_content(self):
862 861 assert isinstance(self.node['content'], bytes)
863 862
864 863 handler, baton = svn.fs.apply_textdelta(
865 864 self.txn_root, self.node['path'], None, None)
866 865 svn.delta.svn_txdelta_send_string(self.node['content'], handler, baton)
867 866
868 867 def _update_file_properties(self):
869 868 properties = self.node.get('properties', {})
870 869 for key, value in properties.items():
871 870 svn.fs.change_node_prop(
872 871 self.txn_root, self.node['path'], key, value)
873 872
874 873
875 874 def apr_time_t(timestamp):
876 875 """
877 876 Convert a Python timestamp into APR timestamp type apr_time_t
878 877 """
879 878 return timestamp * 1E6
880 879
881 880
882 881 def svn_opt_revision_value_t(num):
883 882 """
884 883 Put `num` into a `svn_opt_revision_value_t` structure.
885 884 """
886 885 value = svn.core.svn_opt_revision_value_t()
887 886 value.number = num
888 887 revision = svn.core.svn_opt_revision_t()
889 888 revision.kind = svn.core.svn_opt_revision_number
890 889 revision.value = value
891 890 return revision
@@ -1,242 +1,242 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import os
19 19 import logging
20 20 import itertools
21 21
22 22 import mercurial
23 23 import mercurial.error
24 24 import mercurial.wireprotoserver
25 25 import mercurial.hgweb.common
26 26 import mercurial.hgweb.hgweb_mod
27 27 import webob.exc
28 28
29 29 from vcsserver import pygrack, exceptions, settings, git_lfs
30 30 from vcsserver.str_utils import ascii_bytes, safe_bytes
31 31
32 32 log = logging.getLogger(__name__)
33 33
34 34
35 35 # propagated from mercurial documentation
36 36 HG_UI_SECTIONS = [
37 37 'alias', 'auth', 'decode/encode', 'defaults', 'diff', 'email', 'extensions',
38 38 'format', 'merge-patterns', 'merge-tools', 'hooks', 'http_proxy', 'smtp',
39 39 'patch', 'paths', 'profiling', 'server', 'trusted', 'ui', 'web',
40 40 ]
41 41
42 42
43 43 class HgWeb(mercurial.hgweb.hgweb_mod.hgweb):
44 44 """Extension of hgweb that simplifies some functions."""
45 45
46 46 def _get_view(self, repo):
47 47 """Views are not supported."""
48 48 return repo
49 49
50 50 def loadsubweb(self):
51 51 """The result is only used in the templater method which is not used."""
52 52 return None
53 53
54 54 def run(self):
55 55 """Unused function so raise an exception if accidentally called."""
56 56 raise NotImplementedError
57 57
58 58 def templater(self, req):
59 59 """Function used in an unreachable code path.
60 60
61 61 This code is unreachable because we guarantee that the HTTP request,
62 62 corresponds to a Mercurial command. See the is_hg method. So, we are
63 63 never going to get a user-visible url.
64 64 """
65 65 raise NotImplementedError
66 66
67 67 def archivelist(self, nodeid):
68 68 """Unused function so raise an exception if accidentally called."""
69 69 raise NotImplementedError
70 70
71 71 def __call__(self, environ, start_response):
72 72 """Run the WSGI application.
73 73
74 74 This may be called by multiple threads.
75 75 """
76 76 from mercurial.hgweb import request as requestmod
77 77 req = requestmod.parserequestfromenv(environ)
78 78 res = requestmod.wsgiresponse(req, start_response)
79 79 gen = self.run_wsgi(req, res)
80 80
81 81 first_chunk = None
82 82
83 83 try:
84 84 data = next(gen)
85 85
86 86 def first_chunk():
87 87 yield data
88 88 except StopIteration:
89 89 pass
90 90
91 91 if first_chunk:
92 92 return itertools.chain(first_chunk(), gen)
93 93 return gen
94 94
95 95 def _runwsgi(self, req, res, repo):
96 96
97 97 cmd = req.qsparams.get(b'cmd', '')
98 98 if not mercurial.wireprotoserver.iscmd(cmd):
99 99 # NOTE(marcink): for unsupported commands, we return bad request
100 100 # internally from HG
101 101 log.warning('cmd: `%s` is not supported by the mercurial wireprotocol v1', cmd)
102 102 from mercurial.hgweb.common import statusmessage
103 103 res.status = statusmessage(mercurial.hgweb.common.HTTP_BAD_REQUEST)
104 104 res.setbodybytes(b'')
105 105 return res.sendresponse()
106 106
107 return super(HgWeb, self)._runwsgi(req, res, repo)
107 return super()._runwsgi(req, res, repo)
108 108
109 109
110 110 def make_hg_ui_from_config(repo_config):
111 111 baseui = mercurial.ui.ui()
112 112
113 113 # clean the baseui object
114 114 baseui._ocfg = mercurial.config.config()
115 115 baseui._ucfg = mercurial.config.config()
116 116 baseui._tcfg = mercurial.config.config()
117 117
118 118 for section, option, value in repo_config:
119 119 baseui.setconfig(
120 120 ascii_bytes(section, allow_bytes=True),
121 121 ascii_bytes(option, allow_bytes=True),
122 122 ascii_bytes(value, allow_bytes=True))
123 123
124 124 # make our hgweb quiet so it doesn't print output
125 125 baseui.setconfig(b'ui', b'quiet', b'true')
126 126
127 127 return baseui
128 128
129 129
130 130 def update_hg_ui_from_hgrc(baseui, repo_path):
131 131 path = os.path.join(repo_path, '.hg', 'hgrc')
132 132
133 133 if not os.path.isfile(path):
134 134 log.debug('hgrc file is not present at %s, skipping...', path)
135 135 return
136 136 log.debug('reading hgrc from %s', path)
137 137 cfg = mercurial.config.config()
138 138 cfg.read(ascii_bytes(path))
139 139 for section in HG_UI_SECTIONS:
140 140 for k, v in cfg.items(section):
141 141 log.debug('settings ui from file: [%s] %s=%s', section, k, v)
142 142 baseui.setconfig(
143 143 ascii_bytes(section, allow_bytes=True),
144 144 ascii_bytes(k, allow_bytes=True),
145 145 ascii_bytes(v, allow_bytes=True))
146 146
147 147
148 148 def create_hg_wsgi_app(repo_path, repo_name, config):
149 149 """
150 150 Prepares a WSGI application to handle Mercurial requests.
151 151
152 152 :param config: is a list of 3-item tuples representing a ConfigObject
153 153 (it is the serialized version of the config object).
154 154 """
155 155 log.debug("Creating Mercurial WSGI application")
156 156
157 157 baseui = make_hg_ui_from_config(config)
158 158 update_hg_ui_from_hgrc(baseui, repo_path)
159 159
160 160 try:
161 161 return HgWeb(safe_bytes(repo_path), name=safe_bytes(repo_name), baseui=baseui)
162 162 except mercurial.error.RequirementError as e:
163 163 raise exceptions.RequirementException(e)(e)
164 164
165 165
166 166 class GitHandler(object):
167 167 """
168 168 Handler for Git operations like push/pull etc
169 169 """
170 170 def __init__(self, repo_location, repo_name, git_path, update_server_info,
171 171 extras):
172 172 if not os.path.isdir(repo_location):
173 173 raise OSError(repo_location)
174 174 self.content_path = repo_location
175 175 self.repo_name = repo_name
176 176 self.repo_location = repo_location
177 177 self.extras = extras
178 178 self.git_path = git_path
179 179 self.update_server_info = update_server_info
180 180
181 181 def __call__(self, environ, start_response):
182 182 app = webob.exc.HTTPNotFound()
183 183 candidate_paths = (
184 184 self.content_path, os.path.join(self.content_path, '.git'))
185 185
186 186 for content_path in candidate_paths:
187 187 try:
188 188 app = pygrack.GitRepository(
189 189 self.repo_name, content_path, self.git_path,
190 190 self.update_server_info, self.extras)
191 191 break
192 192 except OSError:
193 193 continue
194 194
195 195 return app(environ, start_response)
196 196
197 197
198 198 def create_git_wsgi_app(repo_path, repo_name, config):
199 199 """
200 200 Creates a WSGI application to handle Git requests.
201 201
202 202 :param config: is a dictionary holding the extras.
203 203 """
204 204 git_path = settings.GIT_EXECUTABLE
205 205 update_server_info = config.pop('git_update_server_info')
206 206 app = GitHandler(
207 207 repo_path, repo_name, git_path, update_server_info, config)
208 208
209 209 return app
210 210
211 211
212 212 class GitLFSHandler(object):
213 213 """
214 214 Handler for Git LFS operations
215 215 """
216 216
217 217 def __init__(self, repo_location, repo_name, git_path, update_server_info,
218 218 extras):
219 219 if not os.path.isdir(repo_location):
220 220 raise OSError(repo_location)
221 221 self.content_path = repo_location
222 222 self.repo_name = repo_name
223 223 self.repo_location = repo_location
224 224 self.extras = extras
225 225 self.git_path = git_path
226 226 self.update_server_info = update_server_info
227 227
228 228 def get_app(self, git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme):
229 229 app = git_lfs.create_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
230 230 return app
231 231
232 232
233 233 def create_git_lfs_wsgi_app(repo_path, repo_name, config):
234 234 git_path = settings.GIT_EXECUTABLE
235 235 update_server_info = config.pop(b'git_update_server_info')
236 236 git_lfs_enabled = config.pop(b'git_lfs_enabled')
237 237 git_lfs_store_path = config.pop(b'git_lfs_store_path')
238 238 git_lfs_http_scheme = config.pop(b'git_lfs_http_scheme', 'http')
239 239 app = GitLFSHandler(
240 240 repo_path, repo_name, git_path, update_server_info, config)
241 241
242 242 return app.get_app(git_lfs_enabled, git_lfs_store_path, git_lfs_http_scheme)
@@ -1,133 +1,133 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 import typing
19 19 import base64
20 20 import logging
21 21
22 22
23 23 log = logging.getLogger(__name__)
24 24
25 25
26 26 def safe_int(val, default=None) -> int:
27 27 """
28 28 Returns int() of val if val is not convertable to int use default
29 29 instead
30 30
31 31 :param val:
32 32 :param default:
33 33 """
34 34
35 35 try:
36 36 val = int(val)
37 37 except (ValueError, TypeError):
38 38 val = default
39 39
40 40 return val
41 41
42 42
43 43 def base64_to_str(text) -> str:
44 44 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
45 45
46 46
47 def get_default_encodings() -> typing.List[str]:
47 def get_default_encodings() -> list[str]:
48 48 return ['utf8']
49 49
50 50
51 51 def safe_str(str_, to_encoding=None) -> str:
52 52 """
53 53 safe str function. Does few trick to turn unicode_ into string
54 54
55 55 :param str_: str to encode
56 56 :param to_encoding: encode to this type UTF8 default
57 57 """
58 58 if isinstance(str_, str):
59 59 return str_
60 60
61 61 # if it's bytes cast to str
62 62 if not isinstance(str_, bytes):
63 63 return str(str_)
64 64
65 65 to_encoding = to_encoding or get_default_encodings()
66 66 if not isinstance(to_encoding, (list, tuple)):
67 67 to_encoding = [to_encoding]
68 68
69 69 for enc in to_encoding:
70 70 try:
71 71 return str(str_, enc)
72 72 except UnicodeDecodeError:
73 73 pass
74 74
75 75 return str(str_, to_encoding[0], 'replace')
76 76
77 77
78 78 def safe_bytes(str_, from_encoding=None) -> bytes:
79 79 """
80 80 safe bytes function. Does few trick to turn str_ into bytes string:
81 81
82 82 :param str_: string to decode
83 83 :param from_encoding: encode from this type UTF8 default
84 84 """
85 85 if isinstance(str_, bytes):
86 86 return str_
87 87
88 88 if not isinstance(str_, str):
89 89 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
90 90
91 91 from_encoding = from_encoding or get_default_encodings()
92 92 if not isinstance(from_encoding, (list, tuple)):
93 93 from_encoding = [from_encoding]
94 94
95 95 for enc in from_encoding:
96 96 try:
97 97 return str_.encode(enc)
98 98 except UnicodeDecodeError:
99 99 pass
100 100
101 101 return str_.encode(from_encoding[0], 'replace')
102 102
103 103
104 104 def ascii_bytes(str_, allow_bytes=False) -> bytes:
105 105 """
106 106 Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
107 107 Fails with UnicodeError on invalid input.
108 108 This should be used where encoding and "safe" ambiguity should be avoided.
109 109 Where strings already have been encoded in other ways but still are unicode
110 110 string - for example to hex, base64, json, urlencoding, or are known to be
111 111 identifiers.
112 112 """
113 113 if allow_bytes and isinstance(str_, bytes):
114 114 return str_
115 115
116 116 if not isinstance(str_, str):
117 117 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
118 118 return str_.encode('ascii')
119 119
120 120
121 121 def ascii_str(str_) -> str:
122 122 """
123 123 Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
124 124 Fails with UnicodeError on invalid input.
125 125 This should be used where encoding and "safe" ambiguity should be avoided.
126 126 Where strings are encoded but also in other ways are known to be ASCII, and
127 127 where a unicode string is wanted without caring about encoding. For example
128 128 to hex, base64, urlencoding, or are known to be identifiers.
129 129 """
130 130
131 131 if not isinstance(str_, bytes):
132 132 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
133 133 return str_.decode('ascii')
@@ -1,210 +1,209 b''
1 # -*- coding: utf-8 -*-
2 1 #
3 2 # Copyright (C) 2004-2009 Edgewall Software
4 3 # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
5 4 # All rights reserved.
6 5 #
7 6 # This software is licensed as described in the file COPYING, which
8 7 # you should have received as part of this distribution. The terms
9 8 # are also available at http://trac.edgewall.org/wiki/TracLicense.
10 9 #
11 10 # This software consists of voluntary contributions made by many
12 11 # individuals. For the exact contribution history, see the revision
13 12 # history and logs, available at http://trac.edgewall.org/log/.
14 13 #
15 14 # Author: Christopher Lenz <cmlenz@gmx.de>
16 15
17 16 import difflib
18 17
19 18
20 19 def get_filtered_hunks(fromlines, tolines, context=None,
21 20 ignore_blank_lines=False, ignore_case=False,
22 21 ignore_space_changes=False):
23 22 """Retrieve differences in the form of `difflib.SequenceMatcher`
24 23 opcodes, grouped according to the ``context`` and ``ignore_*``
25 24 parameters.
26 25
27 26 :param fromlines: list of lines corresponding to the old content
28 27 :param tolines: list of lines corresponding to the new content
29 28 :param ignore_blank_lines: differences about empty lines only are ignored
30 29 :param ignore_case: upper case / lower case only differences are ignored
31 30 :param ignore_space_changes: differences in amount of spaces are ignored
32 31 :param context: the number of "equal" lines kept for representing
33 32 the context of the change
34 33 :return: generator of grouped `difflib.SequenceMatcher` opcodes
35 34
36 35 If none of the ``ignore_*`` parameters is `True`, there's nothing
37 36 to filter out the results will come straight from the
38 37 SequenceMatcher.
39 38 """
40 39 hunks = get_hunks(fromlines, tolines, context)
41 40 if ignore_space_changes or ignore_case or ignore_blank_lines:
42 41 hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
43 42 ignore_blank_lines, ignore_case,
44 43 ignore_space_changes)
45 44 return hunks
46 45
47 46
48 47 def get_hunks(fromlines, tolines, context=None):
49 48 """Generator yielding grouped opcodes describing differences .
50 49
51 50 See `get_filtered_hunks` for the parameter descriptions.
52 51 """
53 52 matcher = difflib.SequenceMatcher(None, fromlines, tolines)
54 53 if context is None:
55 54 return (hunk for hunk in [matcher.get_opcodes()])
56 55 else:
57 56 return matcher.get_grouped_opcodes(context)
58 57
59 58
60 59 def filter_ignorable_lines(hunks, fromlines, tolines, context,
61 60 ignore_blank_lines, ignore_case,
62 61 ignore_space_changes):
63 62 """Detect line changes that should be ignored and emits them as
64 63 tagged as "equal", possibly joined with the preceding and/or
65 64 following "equal" block.
66 65
67 66 See `get_filtered_hunks` for the parameter descriptions.
68 67 """
69 68 def is_ignorable(tag, fromlines, tolines):
70 69 if tag == 'delete' and ignore_blank_lines:
71 70 if ''.join(fromlines) == '':
72 71 return True
73 72 elif tag == 'insert' and ignore_blank_lines:
74 73 if ''.join(tolines) == '':
75 74 return True
76 75 elif tag == 'replace' and (ignore_case or ignore_space_changes):
77 76 if len(fromlines) != len(tolines):
78 77 return False
79 78
80 79 def f(input_str):
81 80 if ignore_case:
82 81 input_str = input_str.lower()
83 82 if ignore_space_changes:
84 83 input_str = ' '.join(input_str.split())
85 84 return input_str
86 85
87 86 for i in range(len(fromlines)):
88 87 if f(fromlines[i]) != f(tolines[i]):
89 88 return False
90 89 return True
91 90
92 91 hunks = list(hunks)
93 92 opcodes = []
94 93 ignored_lines = False
95 94 prev = None
96 95 for hunk in hunks:
97 96 for tag, i1, i2, j1, j2 in hunk:
98 97 if tag == 'equal':
99 98 if prev:
100 99 prev = (tag, prev[1], i2, prev[3], j2)
101 100 else:
102 101 prev = (tag, i1, i2, j1, j2)
103 102 else:
104 103 if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
105 104 ignored_lines = True
106 105 if prev:
107 106 prev = 'equal', prev[1], i2, prev[3], j2
108 107 else:
109 108 prev = 'equal', i1, i2, j1, j2
110 109 continue
111 110 if prev:
112 111 opcodes.append(prev)
113 112 opcodes.append((tag, i1, i2, j1, j2))
114 113 prev = None
115 114 if prev:
116 115 opcodes.append(prev)
117 116
118 117 if ignored_lines:
119 118 if context is None:
120 119 yield opcodes
121 120 else:
122 121 # we leave at most n lines with the tag 'equal' before and after
123 122 # every change
124 123 n = context
125 124 nn = n + n
126 125
127 126 group = []
128 127 def all_equal():
129 128 all(op[0] == 'equal' for op in group)
130 129 for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
131 130 if idx == 0 and tag == 'equal': # Fixup leading unchanged block
132 131 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
133 132 elif tag == 'equal' and i2 - i1 > nn:
134 133 group.append((tag, i1, min(i2, i1 + n), j1,
135 134 min(j2, j1 + n)))
136 135 if not all_equal():
137 136 yield group
138 137 group = []
139 138 i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
140 139 group.append((tag, i1, i2, j1, j2))
141 140
142 141 if group and not (len(group) == 1 and group[0][0] == 'equal'):
143 142 if group[-1][0] == 'equal': # Fixup trailing unchanged block
144 143 tag, i1, i2, j1, j2 = group[-1]
145 144 group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
146 145 if not all_equal():
147 146 yield group
148 147 else:
149 148 for hunk in hunks:
150 149 yield hunk
151 150
152 151
153 152 NO_NEWLINE_AT_END = '\\ No newline at end of file'
154 153
155 154
156 155 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
157 156 ignore_case=0, ignore_space_changes=0, lineterm='\n'):
158 157 """
159 158 Generator producing lines corresponding to a textual diff.
160 159
161 160 See `get_filtered_hunks` for the parameter descriptions.
162 161 """
163 162 # TODO: johbo: Check if this can be nicely integrated into the matching
164 163
165 164 if ignore_space_changes:
166 165 fromlines = [l.strip() for l in fromlines]
167 166 tolines = [l.strip() for l in tolines]
168 167
169 168 for group in get_filtered_hunks(fromlines, tolines, context,
170 169 ignore_blank_lines, ignore_case,
171 170 ignore_space_changes):
172 171 i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
173 172 if i1 == 0 and i2 == 0:
174 173 i1, i2 = -1, -1 # support for Add changes
175 174 if j1 == 0 and j2 == 0:
176 175 j1, j2 = -1, -1 # support for Delete changes
177 yield '@@ -%s +%s @@%s' % (
176 yield '@@ -{} +{} @@{}'.format(
178 177 _hunk_range(i1 + 1, i2 - i1),
179 178 _hunk_range(j1 + 1, j2 - j1),
180 179 lineterm)
181 180 for tag, i1, i2, j1, j2 in group:
182 181 if tag == 'equal':
183 182 for line in fromlines[i1:i2]:
184 183 if not line.endswith(lineterm):
185 184 yield ' ' + line + lineterm
186 185 yield NO_NEWLINE_AT_END + lineterm
187 186 else:
188 187 yield ' ' + line
189 188 else:
190 189 if tag in ('replace', 'delete'):
191 190 for line in fromlines[i1:i2]:
192 191 if not line.endswith(lineterm):
193 192 yield '-' + line + lineterm
194 193 yield NO_NEWLINE_AT_END + lineterm
195 194 else:
196 195 yield '-' + line
197 196 if tag in ('replace', 'insert'):
198 197 for line in tolines[j1:j2]:
199 198 if not line.endswith(lineterm):
200 199 yield '+' + line + lineterm
201 200 yield NO_NEWLINE_AT_END + lineterm
202 201 else:
203 202 yield '+' + line
204 203
205 204
206 205 def _hunk_range(start, length):
207 206 if length != 1:
208 207 return '%d,%d' % (start, length)
209 208 else:
210 209 return '%d' % (start, )
@@ -1,54 +1,54 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17 import logging
18 18 import hashlib
19 19
20 20 log = logging.getLogger(__name__)
21 21
22 22
23 23 class AttributeDictBase(dict):
24 24 def __getstate__(self):
25 25 odict = self.__dict__ # get attribute dictionary
26 26 return odict
27 27
28 28 def __setstate__(self, dict):
29 29 self.__dict__ = dict
30 30
31 31 __setattr__ = dict.__setitem__
32 32 __delattr__ = dict.__delitem__
33 33
34 34
35 35 class StrictAttributeDict(AttributeDictBase):
36 36 """
37 37 Strict Version of Attribute dict which raises an Attribute error when
38 38 requested attribute is not set
39 39 """
40 40 def __getattr__(self, attr):
41 41 try:
42 42 return self[attr]
43 43 except KeyError:
44 raise AttributeError('%s object has no attribute %s' % (
44 raise AttributeError('{} object has no attribute {}'.format(
45 45 self.__class__, attr))
46 46
47 47
48 48 class AttributeDict(AttributeDictBase):
49 49 def __getattr__(self, attr):
50 50 return self.get(attr, None)
51 51
52 52
53 53 def sha1(val):
54 54 return hashlib.sha1(val).hexdigest()
@@ -1,47 +1,47 b''
1 1 # RhodeCode VCSServer provides access to different vcs backends via network.
2 2 # Copyright (C) 2014-2020 RhodeCode GmbH
3 3 #
4 4 # This program is free software; you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation; either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software Foundation,
16 16 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 18 from vcsserver.lib import rc_cache
19 19
20 20
21 21 class RemoteBase(object):
22 22 EMPTY_COMMIT = '0' * 40
23 23
24 24 def _region(self, wire):
25 25 cache_repo_id = wire.get('cache_repo_id', '')
26 26 cache_namespace_uid = f'cache_repo.{cache_repo_id}'
27 27 return rc_cache.get_or_create_region('repo_object', cache_namespace_uid)
28 28
29 29 def _cache_on(self, wire):
30 30 context = wire.get('context', '')
31 context_uid = '{}'.format(context)
31 context_uid = f'{context}'
32 32 repo_id = wire.get('repo_id', '')
33 33 cache = wire.get('cache', True)
34 34 cache_on = context and cache
35 35 return cache_on, context_uid, repo_id
36 36
37 37 def vcsserver_invalidate_cache(self, wire, delete):
38 38 from vcsserver.lib import rc_cache
39 39 repo_id = wire.get('repo_id', '')
40 40 cache_repo_id = wire.get('cache_repo_id', '')
41 41 cache_namespace_uid = f'cache_repo.{cache_repo_id}'
42 42
43 43 if delete:
44 44 rc_cache.clear_cache_namespace(
45 45 'repo_object', cache_namespace_uid, invalidate=True)
46 46
47 47 return {'invalidated': {'repo_id': repo_id, 'delete': delete}}
General Comments 0
You need to be logged in to leave comments. Login now