##// END OF EJS Templates
shadow: Fix an error in the regex to detect shadow repository URLs.
Martin Bornhold -
r911:0a696e69 default
parent child Browse files
Show More
@@ -1,522 +1,524 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2014-2016 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 SimpleVCS middleware for handling protocol request (push/clone etc.)
23 23 It's implemented with basic auth function
24 24 """
25 25
26 26 import os
27 27 import logging
28 28 import importlib
29 29 import re
30 30 from functools import wraps
31 31
32 32 from paste.httpheaders import REMOTE_USER, AUTH_TYPE
33 33 from webob.exc import (
34 34 HTTPNotFound, HTTPForbidden, HTTPNotAcceptable, HTTPInternalServerError)
35 35
36 36 import rhodecode
37 37 from rhodecode.authentication.base import authenticate, VCS_TYPE
38 38 from rhodecode.lib.auth import AuthUser, HasPermissionAnyMiddleware
39 39 from rhodecode.lib.base import BasicAuth, get_ip_addr, vcs_operation_context
40 40 from rhodecode.lib.exceptions import (
41 41 HTTPLockedRC, HTTPRequirementError, UserCreationError,
42 42 NotAllowedToCreateUserError)
43 43 from rhodecode.lib.hooks_daemon import prepare_callback_daemon
44 44 from rhodecode.lib.middleware import appenlight
45 45 from rhodecode.lib.middleware.utils import scm_app
46 46 from rhodecode.lib.utils import (
47 47 is_valid_repo, get_rhodecode_realm, get_rhodecode_base_path, SLUG_RE)
48 48 from rhodecode.lib.utils2 import safe_str, fix_PATH, str2bool, safe_unicode
49 49 from rhodecode.lib.vcs.conf import settings as vcs_settings
50 50 from rhodecode.lib.vcs.backends import base
51 51 from rhodecode.model import meta
52 52 from rhodecode.model.db import User, Repository, PullRequest
53 53 from rhodecode.model.scm import ScmModel
54 54 from rhodecode.model.pull_request import PullRequestModel
55 55
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def initialize_generator(factory):
61 61 """
62 62 Initializes the returned generator by draining its first element.
63 63
64 64 This can be used to give a generator an initializer, which is the code
65 65 up to the first yield statement. This decorator enforces that the first
66 66 produced element has the value ``"__init__"`` to make its special
67 67 purpose very explicit in the using code.
68 68 """
69 69
70 70 @wraps(factory)
71 71 def wrapper(*args, **kwargs):
72 72 gen = factory(*args, **kwargs)
73 73 try:
74 74 init = gen.next()
75 75 except StopIteration:
76 76 raise ValueError('Generator must yield at least one element.')
77 77 if init != "__init__":
78 78 raise ValueError('First yielded element must be "__init__".')
79 79 return gen
80 80 return wrapper
81 81
82 82
83 83 class SimpleVCS(object):
84 84 """Common functionality for SCM HTTP handlers."""
85 85
86 86 SCM = 'unknown'
87 87
88 88 acl_repo_name = None
89 89 url_repo_name = None
90 90 vcs_repo_name = None
91 91
92 92 # We have to handle requests to shadow repositories different than requests
93 93 # to normal repositories. Therefore we have to distinguish them. To do this
94 94 # we use this regex which will match only on URLs pointing to shadow
95 95 # repositories.
96 96 shadow_repo_re = re.compile(
97 '(?P<groups>(?:{slug_pat})(?:/{slug_pat})*)' # repo groups
98 '/(?P<target>{slug_pat})' # target repo
99 '/pull-request/(?P<pr_id>\d+)' # pull request
100 '/repository$' # shadow repo
97 '(?P<groups>(?:{slug_pat})(?:/{slug_pat})*/)?' # repo groups
98 '(?P<target>{slug_pat})/' # target repo
99 'pull-request/(?P<pr_id>\d+)/' # pull request
100 'repository$' # shadow repo
101 101 .format(slug_pat=SLUG_RE.pattern))
102 102
103 103 def __init__(self, application, config, registry):
104 104 self.registry = registry
105 105 self.application = application
106 106 self.config = config
107 107 # re-populated by specialized middleware
108 108 self.repo_vcs_config = base.Config()
109 109
110 110 # base path of repo locations
111 111 self.basepath = get_rhodecode_base_path()
112 112 # authenticate this VCS request using authfunc
113 113 auth_ret_code_detection = \
114 114 str2bool(self.config.get('auth_ret_code_detection', False))
115 115 self.authenticate = BasicAuth(
116 116 '', authenticate, registry, config.get('auth_ret_code'),
117 117 auth_ret_code_detection)
118 118 self.ip_addr = '0.0.0.0'
119 119
120 120 def set_repo_names(self, environ):
121 121 """
122 122 This will populate the attributes acl_repo_name, url_repo_name,
123 123 vcs_repo_name and is_shadow_repo. In case of requests to normal (non
124 124 shadow) repositories all names are equal. In case of requests to a
125 125 shadow repository the acl-name points to the target repo of the pull
126 126 request and the vcs-name points to the shadow repo file system path.
127 127 The url-name is always the URL used by the vcs client program.
128 128
129 129 Example in case of a shadow repo:
130 130 acl_repo_name = RepoGroup/MyRepo
131 131 url_repo_name = RepoGroup/MyRepo/pull-request/3/repository
132 132 vcs_repo_name = /repo/base/path/RepoGroup/.__shadow_MyRepo_pr-3'
133 133 """
134 134 # First we set the repo name from URL for all attributes. This is the
135 135 # default if handling normal (non shadow) repo requests.
136 136 self.url_repo_name = self._get_repository_name(environ)
137 137 self.acl_repo_name = self.vcs_repo_name = self.url_repo_name
138 138 self.is_shadow_repo = False
139 139
140 140 # Check if this is a request to a shadow repository.
141 141 match = self.shadow_repo_re.match(self.url_repo_name)
142 142 if match:
143 143 match_dict = match.groupdict()
144 144
145 145 # Build acl repo name from regex match.
146 acl_repo_name = safe_unicode(
147 '{groups}/{target}'.format(**match_dict))
146 acl_repo_name = safe_unicode('{groups}{target}'.format(
147 groups=match_dict['groups'] or '',
148 target=match_dict['target']))
148 149
149 150 # Retrieve pull request instance by ID from regex match.
150 151 pull_request = PullRequest.get(match_dict['pr_id'])
151 152
152 153 # Only proceed if we got a pull request and if acl repo name from
153 154 # URL equals the target repo name of the pull request.
154 if pull_request and acl_repo_name == pull_request.target_repo.repo_name:
155 if pull_request and (acl_repo_name ==
156 pull_request.target_repo.repo_name):
155 157 # Get file system path to shadow repository.
156 158 workspace_id = PullRequestModel()._workspace_id(pull_request)
157 159 target_vcs = pull_request.target_repo.scm_instance()
158 160 vcs_repo_name = target_vcs._get_shadow_repository_path(
159 161 workspace_id)
160 162
161 163 # Store names for later usage.
162 164 self.vcs_repo_name = vcs_repo_name
163 165 self.acl_repo_name = acl_repo_name
164 166 self.is_shadow_repo = True
165 167
166 168 log.debug('Repository names: %s', {
167 169 'acl_repo_name': self.acl_repo_name,
168 170 'url_repo_name': self.url_repo_name,
169 171 'vcs_repo_name': self.vcs_repo_name,
170 172 })
171 173
172 174 @property
173 175 def scm_app(self):
174 176 custom_implementation = self.config.get('vcs.scm_app_implementation')
175 177 if custom_implementation and custom_implementation != 'pyro4':
176 178 log.info(
177 179 "Using custom implementation of scm_app: %s",
178 180 custom_implementation)
179 181 scm_app_impl = importlib.import_module(custom_implementation)
180 182 else:
181 183 scm_app_impl = scm_app
182 184 return scm_app_impl
183 185
184 186 def _get_by_id(self, repo_name):
185 187 """
186 188 Gets a special pattern _<ID> from clone url and tries to replace it
187 189 with a repository_name for support of _<ID> non changeable urls
188 190 """
189 191
190 192 data = repo_name.split('/')
191 193 if len(data) >= 2:
192 194 from rhodecode.model.repo import RepoModel
193 195 by_id_match = RepoModel().get_repo_by_id(repo_name)
194 196 if by_id_match:
195 197 data[1] = by_id_match.repo_name
196 198
197 199 return safe_str('/'.join(data))
198 200
199 201 def _invalidate_cache(self, repo_name):
200 202 """
201 203 Set's cache for this repository for invalidation on next access
202 204
203 205 :param repo_name: full repo name, also a cache key
204 206 """
205 207 ScmModel().mark_for_invalidation(repo_name)
206 208
207 209 def is_valid_and_existing_repo(self, repo_name, base_path, scm_type):
208 210 db_repo = Repository.get_by_repo_name(repo_name)
209 211 if not db_repo:
210 212 log.debug('Repository `%s` not found inside the database.',
211 213 repo_name)
212 214 return False
213 215
214 216 if db_repo.repo_type != scm_type:
215 217 log.warning(
216 218 'Repository `%s` have incorrect scm_type, expected %s got %s',
217 219 repo_name, db_repo.repo_type, scm_type)
218 220 return False
219 221
220 222 return is_valid_repo(repo_name, base_path, expect_scm=scm_type)
221 223
222 224 def valid_and_active_user(self, user):
223 225 """
224 226 Checks if that user is not empty, and if it's actually object it checks
225 227 if he's active.
226 228
227 229 :param user: user object or None
228 230 :return: boolean
229 231 """
230 232 if user is None:
231 233 return False
232 234
233 235 elif user.active:
234 236 return True
235 237
236 238 return False
237 239
238 240 def _check_permission(self, action, user, repo_name, ip_addr=None):
239 241 """
240 242 Checks permissions using action (push/pull) user and repository
241 243 name
242 244
243 245 :param action: push or pull action
244 246 :param user: user instance
245 247 :param repo_name: repository name
246 248 """
247 249 # check IP
248 250 inherit = user.inherit_default_permissions
249 251 ip_allowed = AuthUser.check_ip_allowed(user.user_id, ip_addr,
250 252 inherit_from_default=inherit)
251 253 if ip_allowed:
252 254 log.info('Access for IP:%s allowed', ip_addr)
253 255 else:
254 256 return False
255 257
256 258 if action == 'push':
257 259 if not HasPermissionAnyMiddleware('repository.write',
258 260 'repository.admin')(user,
259 261 repo_name):
260 262 return False
261 263
262 264 else:
263 265 # any other action need at least read permission
264 266 if not HasPermissionAnyMiddleware('repository.read',
265 267 'repository.write',
266 268 'repository.admin')(user,
267 269 repo_name):
268 270 return False
269 271
270 272 return True
271 273
272 274 def _check_ssl(self, environ, start_response):
273 275 """
274 276 Checks the SSL check flag and returns False if SSL is not present
275 277 and required True otherwise
276 278 """
277 279 org_proto = environ['wsgi._org_proto']
278 280 # check if we have SSL required ! if not it's a bad request !
279 281 require_ssl = str2bool(self.repo_vcs_config.get('web', 'push_ssl'))
280 282 if require_ssl and org_proto == 'http':
281 283 log.debug('proto is %s and SSL is required BAD REQUEST !',
282 284 org_proto)
283 285 return False
284 286 return True
285 287
286 288 def __call__(self, environ, start_response):
287 289 try:
288 290 return self._handle_request(environ, start_response)
289 291 except Exception:
290 292 log.exception("Exception while handling request")
291 293 appenlight.track_exception(environ)
292 294 return HTTPInternalServerError()(environ, start_response)
293 295 finally:
294 296 meta.Session.remove()
295 297
296 298 def _handle_request(self, environ, start_response):
297 299
298 300 if not self._check_ssl(environ, start_response):
299 301 reason = ('SSL required, while RhodeCode was unable '
300 302 'to detect this as SSL request')
301 303 log.debug('User not allowed to proceed, %s', reason)
302 304 return HTTPNotAcceptable(reason)(environ, start_response)
303 305
304 306 if not self.url_repo_name:
305 307 log.warning('Repository name is empty: %s', self.url_repo_name)
306 308 # failed to get repo name, we fail now
307 309 return HTTPNotFound()(environ, start_response)
308 310 log.debug('Extracted repo name is %s', self.url_repo_name)
309 311
310 312 ip_addr = get_ip_addr(environ)
311 313 username = None
312 314
313 315 # skip passing error to error controller
314 316 environ['pylons.status_code_redirect'] = True
315 317
316 318 # ======================================================================
317 319 # GET ACTION PULL or PUSH
318 320 # ======================================================================
319 321 action = self._get_action(environ)
320 322
321 323 # ======================================================================
322 324 # Check if this is a request to a shadow repository of a pull request.
323 325 # In this case only pull action is allowed.
324 326 # ======================================================================
325 327 if self.is_shadow_repo and action != 'pull':
326 328 reason = 'Only pull action is allowed for shadow repositories.'
327 329 log.debug('User not allowed to proceed, %s', reason)
328 330 return HTTPNotAcceptable(reason)(environ, start_response)
329 331
330 332 # ======================================================================
331 333 # CHECK ANONYMOUS PERMISSION
332 334 # ======================================================================
333 335 if action in ['pull', 'push']:
334 336 anonymous_user = User.get_default_user()
335 337 username = anonymous_user.username
336 338 if anonymous_user.active:
337 339 # ONLY check permissions if the user is activated
338 340 anonymous_perm = self._check_permission(
339 341 action, anonymous_user, self.acl_repo_name, ip_addr)
340 342 else:
341 343 anonymous_perm = False
342 344
343 345 if not anonymous_user.active or not anonymous_perm:
344 346 if not anonymous_user.active:
345 347 log.debug('Anonymous access is disabled, running '
346 348 'authentication')
347 349
348 350 if not anonymous_perm:
349 351 log.debug('Not enough credentials to access this '
350 352 'repository as anonymous user')
351 353
352 354 username = None
353 355 # ==============================================================
354 356 # DEFAULT PERM FAILED OR ANONYMOUS ACCESS IS DISABLED SO WE
355 357 # NEED TO AUTHENTICATE AND ASK FOR AUTH USER PERMISSIONS
356 358 # ==============================================================
357 359
358 360 # try to auth based on environ, container auth methods
359 361 log.debug('Running PRE-AUTH for container based authentication')
360 362 pre_auth = authenticate(
361 363 '', '', environ, VCS_TYPE, registry=self.registry)
362 364 if pre_auth and pre_auth.get('username'):
363 365 username = pre_auth['username']
364 366 log.debug('PRE-AUTH got %s as username', username)
365 367
366 368 # If not authenticated by the container, running basic auth
367 369 if not username:
368 370 self.authenticate.realm = get_rhodecode_realm()
369 371
370 372 try:
371 373 result = self.authenticate(environ)
372 374 except (UserCreationError, NotAllowedToCreateUserError) as e:
373 375 log.error(e)
374 376 reason = safe_str(e)
375 377 return HTTPNotAcceptable(reason)(environ, start_response)
376 378
377 379 if isinstance(result, str):
378 380 AUTH_TYPE.update(environ, 'basic')
379 381 REMOTE_USER.update(environ, result)
380 382 username = result
381 383 else:
382 384 return result.wsgi_application(environ, start_response)
383 385
384 386 # ==============================================================
385 387 # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME
386 388 # ==============================================================
387 389 user = User.get_by_username(username)
388 390 if not self.valid_and_active_user(user):
389 391 return HTTPForbidden()(environ, start_response)
390 392 username = user.username
391 393 user.update_lastactivity()
392 394 meta.Session().commit()
393 395
394 396 # check user attributes for password change flag
395 397 user_obj = user
396 398 if user_obj and user_obj.username != User.DEFAULT_USER and \
397 399 user_obj.user_data.get('force_password_change'):
398 400 reason = 'password change required'
399 401 log.debug('User not allowed to authenticate, %s', reason)
400 402 return HTTPNotAcceptable(reason)(environ, start_response)
401 403
402 404 # check permissions for this repository
403 405 perm = self._check_permission(
404 406 action, user, self.acl_repo_name, ip_addr)
405 407 if not perm:
406 408 return HTTPForbidden()(environ, start_response)
407 409
408 410 # extras are injected into UI object and later available
409 411 # in hooks executed by rhodecode
410 412 check_locking = _should_check_locking(environ.get('QUERY_STRING'))
411 413 extras = vcs_operation_context(
412 414 environ, repo_name=self.acl_repo_name, username=username,
413 415 action=action, scm=self.SCM, check_locking=check_locking,
414 416 is_shadow_repo=self.is_shadow_repo
415 417 )
416 418
417 419 # ======================================================================
418 420 # REQUEST HANDLING
419 421 # ======================================================================
420 422 repo_path = os.path.join(
421 423 safe_str(self.basepath), safe_str(self.vcs_repo_name))
422 424 log.debug('Repository path is %s', repo_path)
423 425
424 426 fix_PATH()
425 427
426 428 log.info(
427 429 '%s action on %s repo "%s" by "%s" from %s',
428 430 action, self.SCM, safe_str(self.url_repo_name),
429 431 safe_str(username), ip_addr)
430 432
431 433 return self._generate_vcs_response(
432 434 environ, start_response, repo_path, extras, action)
433 435
434 436 @initialize_generator
435 437 def _generate_vcs_response(
436 438 self, environ, start_response, repo_path, extras, action):
437 439 """
438 440 Returns a generator for the response content.
439 441
440 442 This method is implemented as a generator, so that it can trigger
441 443 the cache validation after all content sent back to the client. It
442 444 also handles the locking exceptions which will be triggered when
443 445 the first chunk is produced by the underlying WSGI application.
444 446 """
445 447 callback_daemon, extras = self._prepare_callback_daemon(extras)
446 448 config = self._create_config(extras, self.acl_repo_name)
447 449 log.debug('HOOKS extras is %s', extras)
448 450 app = self._create_wsgi_app(repo_path, self.url_repo_name, config)
449 451
450 452 try:
451 453 with callback_daemon:
452 454 try:
453 455 response = app(environ, start_response)
454 456 finally:
455 457 # This statement works together with the decorator
456 458 # "initialize_generator" above. The decorator ensures that
457 459 # we hit the first yield statement before the generator is
458 460 # returned back to the WSGI server. This is needed to
459 461 # ensure that the call to "app" above triggers the
460 462 # needed callback to "start_response" before the
461 463 # generator is actually used.
462 464 yield "__init__"
463 465
464 466 for chunk in response:
465 467 yield chunk
466 468 except Exception as exc:
467 469 # TODO: johbo: Improve "translating" back the exception.
468 470 if getattr(exc, '_vcs_kind', None) == 'repo_locked':
469 471 exc = HTTPLockedRC(*exc.args)
470 472 _code = rhodecode.CONFIG.get('lock_ret_code')
471 473 log.debug('Repository LOCKED ret code %s!', (_code,))
472 474 elif getattr(exc, '_vcs_kind', None) == 'requirement':
473 475 log.debug(
474 476 'Repository requires features unknown to this Mercurial')
475 477 exc = HTTPRequirementError(*exc.args)
476 478 else:
477 479 raise
478 480
479 481 for chunk in exc(environ, start_response):
480 482 yield chunk
481 483 finally:
482 484 # invalidate cache on push
483 485 try:
484 486 if action == 'push':
485 487 self._invalidate_cache(self.url_repo_name)
486 488 finally:
487 489 meta.Session.remove()
488 490
489 491 def _get_repository_name(self, environ):
490 492 """Get repository name out of the environmnent
491 493
492 494 :param environ: WSGI environment
493 495 """
494 496 raise NotImplementedError()
495 497
496 498 def _get_action(self, environ):
497 499 """Map request commands into a pull or push command.
498 500
499 501 :param environ: WSGI environment
500 502 """
501 503 raise NotImplementedError()
502 504
503 505 def _create_wsgi_app(self, repo_path, repo_name, config):
504 506 """Return the WSGI app that will finally handle the request."""
505 507 raise NotImplementedError()
506 508
507 509 def _create_config(self, extras, repo_name):
508 510 """Create a Pyro safe config representation."""
509 511 raise NotImplementedError()
510 512
511 513 def _prepare_callback_daemon(self, extras):
512 514 return prepare_callback_daemon(
513 515 extras, protocol=vcs_settings.HOOKS_PROTOCOL,
514 516 use_direct_calls=vcs_settings.HOOKS_DIRECT_CALLS)
515 517
516 518
517 519 def _should_check_locking(query_string):
518 520 # this is kind of hacky, but due to how mercurial handles client-server
519 521 # server see all operation on commit; bookmarks, phases and
520 522 # obsolescence marker in different transaction, we don't want to check
521 523 # locking on those
522 524 return query_string not in ['cmd=listkeys']
General Comments 0
You need to be logged in to leave comments. Login now