##// END OF EJS Templates
shadow: Simplyfy the regular expression for detecting shadow repository URLs
Martin Bornhold -
r918:30123d50 default
parent child Browse files
Show More
@@ -1,524 +1,524 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2014-2016 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 """
22 22 SimpleVCS middleware for handling protocol request (push/clone etc.)
23 23 It's implemented with basic auth function
24 24 """
25 25
26 26 import os
27 27 import logging
28 28 import importlib
29 29 import re
30 30 from functools import wraps
31 31
32 32 from paste.httpheaders import REMOTE_USER, AUTH_TYPE
33 33 from webob.exc import (
34 34 HTTPNotFound, HTTPForbidden, HTTPNotAcceptable, HTTPInternalServerError)
35 35
36 36 import rhodecode
37 37 from rhodecode.authentication.base import authenticate, VCS_TYPE
38 38 from rhodecode.lib.auth import AuthUser, HasPermissionAnyMiddleware
39 39 from rhodecode.lib.base import BasicAuth, get_ip_addr, vcs_operation_context
40 40 from rhodecode.lib.exceptions import (
41 41 HTTPLockedRC, HTTPRequirementError, UserCreationError,
42 42 NotAllowedToCreateUserError)
43 43 from rhodecode.lib.hooks_daemon import prepare_callback_daemon
44 44 from rhodecode.lib.middleware import appenlight
45 45 from rhodecode.lib.middleware.utils import scm_app
46 46 from rhodecode.lib.utils import (
47 47 is_valid_repo, get_rhodecode_realm, get_rhodecode_base_path, SLUG_RE)
48 48 from rhodecode.lib.utils2 import safe_str, fix_PATH, str2bool, safe_unicode
49 49 from rhodecode.lib.vcs.conf import settings as vcs_settings
50 50 from rhodecode.lib.vcs.backends import base
51 51 from rhodecode.model import meta
52 52 from rhodecode.model.db import User, Repository, PullRequest
53 53 from rhodecode.model.scm import ScmModel
54 54 from rhodecode.model.pull_request import PullRequestModel
55 55
56 56
57 57 log = logging.getLogger(__name__)
58 58
59 59
60 60 def initialize_generator(factory):
61 61 """
62 62 Initializes the returned generator by draining its first element.
63 63
64 64 This can be used to give a generator an initializer, which is the code
65 65 up to the first yield statement. This decorator enforces that the first
66 66 produced element has the value ``"__init__"`` to make its special
67 67 purpose very explicit in the using code.
68 68 """
69 69
70 70 @wraps(factory)
71 71 def wrapper(*args, **kwargs):
72 72 gen = factory(*args, **kwargs)
73 73 try:
74 74 init = gen.next()
75 75 except StopIteration:
76 76 raise ValueError('Generator must yield at least one element.')
77 77 if init != "__init__":
78 78 raise ValueError('First yielded element must be "__init__".')
79 79 return gen
80 80 return wrapper
81 81
82 82
83 83 class SimpleVCS(object):
84 84 """Common functionality for SCM HTTP handlers."""
85 85
86 86 SCM = 'unknown'
87 87
88 88 acl_repo_name = None
89 89 url_repo_name = None
90 90 vcs_repo_name = None
91 91
92 92 # We have to handle requests to shadow repositories different than requests
93 93 # to normal repositories. Therefore we have to distinguish them. To do this
94 94 # we use this regex which will match only on URLs pointing to shadow
95 95 # repositories.
96 96 shadow_repo_re = re.compile(
97 '(?P<groups>(?:{slug_pat})(?:/{slug_pat})*/)?' # repo groups
98 '(?P<target>{slug_pat})/' # target repo
99 'pull-request/(?P<pr_id>\d+)/' # pull request
100 'repository$' # shadow repo
97 '(?P<groups>(?:{slug_pat}/)*)' # repo groups
98 '(?P<target>{slug_pat})/' # target repo
99 'pull-request/(?P<pr_id>\d+)/' # pull request
100 'repository$' # shadow repo
101 101 .format(slug_pat=SLUG_RE.pattern))
102 102
103 103 def __init__(self, application, config, registry):
104 104 self.registry = registry
105 105 self.application = application
106 106 self.config = config
107 107 # re-populated by specialized middleware
108 108 self.repo_vcs_config = base.Config()
109 109
110 110 # base path of repo locations
111 111 self.basepath = get_rhodecode_base_path()
112 112 # authenticate this VCS request using authfunc
113 113 auth_ret_code_detection = \
114 114 str2bool(self.config.get('auth_ret_code_detection', False))
115 115 self.authenticate = BasicAuth(
116 116 '', authenticate, registry, config.get('auth_ret_code'),
117 117 auth_ret_code_detection)
118 118 self.ip_addr = '0.0.0.0'
119 119
120 120 def set_repo_names(self, environ):
121 121 """
122 122 This will populate the attributes acl_repo_name, url_repo_name,
123 123 vcs_repo_name and is_shadow_repo. In case of requests to normal (non
124 124 shadow) repositories all names are equal. In case of requests to a
125 125 shadow repository the acl-name points to the target repo of the pull
126 126 request and the vcs-name points to the shadow repo file system path.
127 127 The url-name is always the URL used by the vcs client program.
128 128
129 129 Example in case of a shadow repo:
130 130 acl_repo_name = RepoGroup/MyRepo
131 131 url_repo_name = RepoGroup/MyRepo/pull-request/3/repository
132 132 vcs_repo_name = /repo/base/path/RepoGroup/.__shadow_MyRepo_pr-3'
133 133 """
134 134 # First we set the repo name from URL for all attributes. This is the
135 135 # default if handling normal (non shadow) repo requests.
136 136 self.url_repo_name = self._get_repository_name(environ)
137 137 self.acl_repo_name = self.vcs_repo_name = self.url_repo_name
138 138 self.is_shadow_repo = False
139 139
140 140 # Check if this is a request to a shadow repository.
141 141 match = self.shadow_repo_re.match(self.url_repo_name)
142 142 if match:
143 143 match_dict = match.groupdict()
144 144
145 145 # Build acl repo name from regex match.
146 146 acl_repo_name = safe_unicode('{groups}{target}'.format(
147 147 groups=match_dict['groups'] or '',
148 148 target=match_dict['target']))
149 149
150 150 # Retrieve pull request instance by ID from regex match.
151 151 pull_request = PullRequest.get(match_dict['pr_id'])
152 152
153 153 # Only proceed if we got a pull request and if acl repo name from
154 154 # URL equals the target repo name of the pull request.
155 155 if pull_request and (acl_repo_name ==
156 156 pull_request.target_repo.repo_name):
157 157 # Get file system path to shadow repository.
158 158 workspace_id = PullRequestModel()._workspace_id(pull_request)
159 159 target_vcs = pull_request.target_repo.scm_instance()
160 160 vcs_repo_name = target_vcs._get_shadow_repository_path(
161 161 workspace_id)
162 162
163 163 # Store names for later usage.
164 164 self.vcs_repo_name = vcs_repo_name
165 165 self.acl_repo_name = acl_repo_name
166 166 self.is_shadow_repo = True
167 167
168 168 log.debug('Repository names: %s', {
169 169 'acl_repo_name': self.acl_repo_name,
170 170 'url_repo_name': self.url_repo_name,
171 171 'vcs_repo_name': self.vcs_repo_name,
172 172 })
173 173
174 174 @property
175 175 def scm_app(self):
176 176 custom_implementation = self.config.get('vcs.scm_app_implementation')
177 177 if custom_implementation and custom_implementation != 'pyro4':
178 178 log.info(
179 179 "Using custom implementation of scm_app: %s",
180 180 custom_implementation)
181 181 scm_app_impl = importlib.import_module(custom_implementation)
182 182 else:
183 183 scm_app_impl = scm_app
184 184 return scm_app_impl
185 185
186 186 def _get_by_id(self, repo_name):
187 187 """
188 188 Gets a special pattern _<ID> from clone url and tries to replace it
189 189 with a repository_name for support of _<ID> non changeable urls
190 190 """
191 191
192 192 data = repo_name.split('/')
193 193 if len(data) >= 2:
194 194 from rhodecode.model.repo import RepoModel
195 195 by_id_match = RepoModel().get_repo_by_id(repo_name)
196 196 if by_id_match:
197 197 data[1] = by_id_match.repo_name
198 198
199 199 return safe_str('/'.join(data))
200 200
201 201 def _invalidate_cache(self, repo_name):
202 202 """
203 203 Set's cache for this repository for invalidation on next access
204 204
205 205 :param repo_name: full repo name, also a cache key
206 206 """
207 207 ScmModel().mark_for_invalidation(repo_name)
208 208
209 209 def is_valid_and_existing_repo(self, repo_name, base_path, scm_type):
210 210 db_repo = Repository.get_by_repo_name(repo_name)
211 211 if not db_repo:
212 212 log.debug('Repository `%s` not found inside the database.',
213 213 repo_name)
214 214 return False
215 215
216 216 if db_repo.repo_type != scm_type:
217 217 log.warning(
218 218 'Repository `%s` have incorrect scm_type, expected %s got %s',
219 219 repo_name, db_repo.repo_type, scm_type)
220 220 return False
221 221
222 222 return is_valid_repo(repo_name, base_path, expect_scm=scm_type)
223 223
224 224 def valid_and_active_user(self, user):
225 225 """
226 226 Checks if that user is not empty, and if it's actually object it checks
227 227 if he's active.
228 228
229 229 :param user: user object or None
230 230 :return: boolean
231 231 """
232 232 if user is None:
233 233 return False
234 234
235 235 elif user.active:
236 236 return True
237 237
238 238 return False
239 239
240 240 def _check_permission(self, action, user, repo_name, ip_addr=None):
241 241 """
242 242 Checks permissions using action (push/pull) user and repository
243 243 name
244 244
245 245 :param action: push or pull action
246 246 :param user: user instance
247 247 :param repo_name: repository name
248 248 """
249 249 # check IP
250 250 inherit = user.inherit_default_permissions
251 251 ip_allowed = AuthUser.check_ip_allowed(user.user_id, ip_addr,
252 252 inherit_from_default=inherit)
253 253 if ip_allowed:
254 254 log.info('Access for IP:%s allowed', ip_addr)
255 255 else:
256 256 return False
257 257
258 258 if action == 'push':
259 259 if not HasPermissionAnyMiddleware('repository.write',
260 260 'repository.admin')(user,
261 261 repo_name):
262 262 return False
263 263
264 264 else:
265 265 # any other action need at least read permission
266 266 if not HasPermissionAnyMiddleware('repository.read',
267 267 'repository.write',
268 268 'repository.admin')(user,
269 269 repo_name):
270 270 return False
271 271
272 272 return True
273 273
274 274 def _check_ssl(self, environ, start_response):
275 275 """
276 276 Checks the SSL check flag and returns False if SSL is not present
277 277 and required True otherwise
278 278 """
279 279 org_proto = environ['wsgi._org_proto']
280 280 # check if we have SSL required ! if not it's a bad request !
281 281 require_ssl = str2bool(self.repo_vcs_config.get('web', 'push_ssl'))
282 282 if require_ssl and org_proto == 'http':
283 283 log.debug('proto is %s and SSL is required BAD REQUEST !',
284 284 org_proto)
285 285 return False
286 286 return True
287 287
288 288 def __call__(self, environ, start_response):
289 289 try:
290 290 return self._handle_request(environ, start_response)
291 291 except Exception:
292 292 log.exception("Exception while handling request")
293 293 appenlight.track_exception(environ)
294 294 return HTTPInternalServerError()(environ, start_response)
295 295 finally:
296 296 meta.Session.remove()
297 297
298 298 def _handle_request(self, environ, start_response):
299 299
300 300 if not self._check_ssl(environ, start_response):
301 301 reason = ('SSL required, while RhodeCode was unable '
302 302 'to detect this as SSL request')
303 303 log.debug('User not allowed to proceed, %s', reason)
304 304 return HTTPNotAcceptable(reason)(environ, start_response)
305 305
306 306 if not self.url_repo_name:
307 307 log.warning('Repository name is empty: %s', self.url_repo_name)
308 308 # failed to get repo name, we fail now
309 309 return HTTPNotFound()(environ, start_response)
310 310 log.debug('Extracted repo name is %s', self.url_repo_name)
311 311
312 312 ip_addr = get_ip_addr(environ)
313 313 username = None
314 314
315 315 # skip passing error to error controller
316 316 environ['pylons.status_code_redirect'] = True
317 317
318 318 # ======================================================================
319 319 # GET ACTION PULL or PUSH
320 320 # ======================================================================
321 321 action = self._get_action(environ)
322 322
323 323 # ======================================================================
324 324 # Check if this is a request to a shadow repository of a pull request.
325 325 # In this case only pull action is allowed.
326 326 # ======================================================================
327 327 if self.is_shadow_repo and action != 'pull':
328 328 reason = 'Only pull action is allowed for shadow repositories.'
329 329 log.debug('User not allowed to proceed, %s', reason)
330 330 return HTTPNotAcceptable(reason)(environ, start_response)
331 331
332 332 # ======================================================================
333 333 # CHECK ANONYMOUS PERMISSION
334 334 # ======================================================================
335 335 if action in ['pull', 'push']:
336 336 anonymous_user = User.get_default_user()
337 337 username = anonymous_user.username
338 338 if anonymous_user.active:
339 339 # ONLY check permissions if the user is activated
340 340 anonymous_perm = self._check_permission(
341 341 action, anonymous_user, self.acl_repo_name, ip_addr)
342 342 else:
343 343 anonymous_perm = False
344 344
345 345 if not anonymous_user.active or not anonymous_perm:
346 346 if not anonymous_user.active:
347 347 log.debug('Anonymous access is disabled, running '
348 348 'authentication')
349 349
350 350 if not anonymous_perm:
351 351 log.debug('Not enough credentials to access this '
352 352 'repository as anonymous user')
353 353
354 354 username = None
355 355 # ==============================================================
356 356 # DEFAULT PERM FAILED OR ANONYMOUS ACCESS IS DISABLED SO WE
357 357 # NEED TO AUTHENTICATE AND ASK FOR AUTH USER PERMISSIONS
358 358 # ==============================================================
359 359
360 360 # try to auth based on environ, container auth methods
361 361 log.debug('Running PRE-AUTH for container based authentication')
362 362 pre_auth = authenticate(
363 363 '', '', environ, VCS_TYPE, registry=self.registry)
364 364 if pre_auth and pre_auth.get('username'):
365 365 username = pre_auth['username']
366 366 log.debug('PRE-AUTH got %s as username', username)
367 367
368 368 # If not authenticated by the container, running basic auth
369 369 if not username:
370 370 self.authenticate.realm = get_rhodecode_realm()
371 371
372 372 try:
373 373 result = self.authenticate(environ)
374 374 except (UserCreationError, NotAllowedToCreateUserError) as e:
375 375 log.error(e)
376 376 reason = safe_str(e)
377 377 return HTTPNotAcceptable(reason)(environ, start_response)
378 378
379 379 if isinstance(result, str):
380 380 AUTH_TYPE.update(environ, 'basic')
381 381 REMOTE_USER.update(environ, result)
382 382 username = result
383 383 else:
384 384 return result.wsgi_application(environ, start_response)
385 385
386 386 # ==============================================================
387 387 # CHECK PERMISSIONS FOR THIS REQUEST USING GIVEN USERNAME
388 388 # ==============================================================
389 389 user = User.get_by_username(username)
390 390 if not self.valid_and_active_user(user):
391 391 return HTTPForbidden()(environ, start_response)
392 392 username = user.username
393 393 user.update_lastactivity()
394 394 meta.Session().commit()
395 395
396 396 # check user attributes for password change flag
397 397 user_obj = user
398 398 if user_obj and user_obj.username != User.DEFAULT_USER and \
399 399 user_obj.user_data.get('force_password_change'):
400 400 reason = 'password change required'
401 401 log.debug('User not allowed to authenticate, %s', reason)
402 402 return HTTPNotAcceptable(reason)(environ, start_response)
403 403
404 404 # check permissions for this repository
405 405 perm = self._check_permission(
406 406 action, user, self.acl_repo_name, ip_addr)
407 407 if not perm:
408 408 return HTTPForbidden()(environ, start_response)
409 409
410 410 # extras are injected into UI object and later available
411 411 # in hooks executed by rhodecode
412 412 check_locking = _should_check_locking(environ.get('QUERY_STRING'))
413 413 extras = vcs_operation_context(
414 414 environ, repo_name=self.acl_repo_name, username=username,
415 415 action=action, scm=self.SCM, check_locking=check_locking,
416 416 is_shadow_repo=self.is_shadow_repo
417 417 )
418 418
419 419 # ======================================================================
420 420 # REQUEST HANDLING
421 421 # ======================================================================
422 422 repo_path = os.path.join(
423 423 safe_str(self.basepath), safe_str(self.vcs_repo_name))
424 424 log.debug('Repository path is %s', repo_path)
425 425
426 426 fix_PATH()
427 427
428 428 log.info(
429 429 '%s action on %s repo "%s" by "%s" from %s',
430 430 action, self.SCM, safe_str(self.url_repo_name),
431 431 safe_str(username), ip_addr)
432 432
433 433 return self._generate_vcs_response(
434 434 environ, start_response, repo_path, extras, action)
435 435
436 436 @initialize_generator
437 437 def _generate_vcs_response(
438 438 self, environ, start_response, repo_path, extras, action):
439 439 """
440 440 Returns a generator for the response content.
441 441
442 442 This method is implemented as a generator, so that it can trigger
443 443 the cache validation after all content sent back to the client. It
444 444 also handles the locking exceptions which will be triggered when
445 445 the first chunk is produced by the underlying WSGI application.
446 446 """
447 447 callback_daemon, extras = self._prepare_callback_daemon(extras)
448 448 config = self._create_config(extras, self.acl_repo_name)
449 449 log.debug('HOOKS extras is %s', extras)
450 450 app = self._create_wsgi_app(repo_path, self.url_repo_name, config)
451 451
452 452 try:
453 453 with callback_daemon:
454 454 try:
455 455 response = app(environ, start_response)
456 456 finally:
457 457 # This statement works together with the decorator
458 458 # "initialize_generator" above. The decorator ensures that
459 459 # we hit the first yield statement before the generator is
460 460 # returned back to the WSGI server. This is needed to
461 461 # ensure that the call to "app" above triggers the
462 462 # needed callback to "start_response" before the
463 463 # generator is actually used.
464 464 yield "__init__"
465 465
466 466 for chunk in response:
467 467 yield chunk
468 468 except Exception as exc:
469 469 # TODO: johbo: Improve "translating" back the exception.
470 470 if getattr(exc, '_vcs_kind', None) == 'repo_locked':
471 471 exc = HTTPLockedRC(*exc.args)
472 472 _code = rhodecode.CONFIG.get('lock_ret_code')
473 473 log.debug('Repository LOCKED ret code %s!', (_code,))
474 474 elif getattr(exc, '_vcs_kind', None) == 'requirement':
475 475 log.debug(
476 476 'Repository requires features unknown to this Mercurial')
477 477 exc = HTTPRequirementError(*exc.args)
478 478 else:
479 479 raise
480 480
481 481 for chunk in exc(environ, start_response):
482 482 yield chunk
483 483 finally:
484 484 # invalidate cache on push
485 485 try:
486 486 if action == 'push':
487 487 self._invalidate_cache(self.url_repo_name)
488 488 finally:
489 489 meta.Session.remove()
490 490
491 491 def _get_repository_name(self, environ):
492 492 """Get repository name out of the environmnent
493 493
494 494 :param environ: WSGI environment
495 495 """
496 496 raise NotImplementedError()
497 497
498 498 def _get_action(self, environ):
499 499 """Map request commands into a pull or push command.
500 500
501 501 :param environ: WSGI environment
502 502 """
503 503 raise NotImplementedError()
504 504
505 505 def _create_wsgi_app(self, repo_path, repo_name, config):
506 506 """Return the WSGI app that will finally handle the request."""
507 507 raise NotImplementedError()
508 508
509 509 def _create_config(self, extras, repo_name):
510 510 """Create a Pyro safe config representation."""
511 511 raise NotImplementedError()
512 512
513 513 def _prepare_callback_daemon(self, extras):
514 514 return prepare_callback_daemon(
515 515 extras, protocol=vcs_settings.HOOKS_PROTOCOL,
516 516 use_direct_calls=vcs_settings.HOOKS_DIRECT_CALLS)
517 517
518 518
519 519 def _should_check_locking(query_string):
520 520 # this is kind of hacky, but due to how mercurial handles client-server
521 521 # server see all operation on commit; bookmarks, phases and
522 522 # obsolescence marker in different transaction, we don't want to check
523 523 # locking on those
524 524 return query_string not in ['cmd=listkeys']
General Comments 0
You need to be logged in to leave comments. Login now