diff --git a/rhodecode/lib/utils.py b/rhodecode/lib/utils.py --- a/rhodecode/lib/utils.py +++ b/rhodecode/lib/utils.py @@ -23,7 +23,6 @@ Utilities library for RhodeCode import datetime import decorator -import json import logging import os import re @@ -34,20 +33,20 @@ import tempfile import traceback import tarfile import warnings -import hashlib from os.path import join as jn import paste import pkg_resources -from webhelpers2.text import collapse, remove_formatting +from webhelpers2.text import collapse, strip_tags, convert_accented_entities, convert_misc_entities + from mako import exceptions -from pyramid.threadlocal import get_current_registry +from rhodecode.lib.hash_utils import sha256_safe, md5, sha1 +from rhodecode.lib.str_utils import safe_bytes, safe_str from rhodecode.lib.vcs.backends.base import Config from rhodecode.lib.vcs.exceptions import VCSError from rhodecode.lib.vcs.utils.helpers import get_scm, get_scm_backend -from rhodecode.lib.utils2 import ( - safe_str, safe_unicode, get_current_rhodecode_user, md5, sha1) +from rhodecode.lib.ext_json import sjson as json from rhodecode.model import meta from rhodecode.model.db import ( Repository, User, RhodeCodeUi, UserLog, RepoGroup, UserGroup) @@ -61,16 +60,16 @@ REMOVED_REPO_PAT = re.compile(r'rm__\d{8 # String which contains characters that are not allowed in slug names for # repositories or repository groups. It is properly escaped to use it in # regular expressions. -SLUG_BAD_CHARS = re.escape('`?=[]\;\'"<>,/~!@#$%^&*()+{}|:') +SLUG_BAD_CHARS = re.escape(r'`?=[]\;\'"<>,/~!@#$%^&*()+{}|:') # Regex that matches forbidden characters in repo/group slugs. -SLUG_BAD_CHAR_RE = re.compile('[{}\x00-\x08\x0b-\x0c\x0e-\x1f]'.format(SLUG_BAD_CHARS)) +SLUG_BAD_CHAR_RE = re.compile(r'[{}\x00-\x08\x0b-\x0c\x0e-\x1f]'.format(SLUG_BAD_CHARS)) # Regex that matches allowed characters in repo/group slugs. -SLUG_GOOD_CHAR_RE = re.compile('[^{}]'.format(SLUG_BAD_CHARS)) +SLUG_GOOD_CHAR_RE = re.compile(r'[^{}]'.format(SLUG_BAD_CHARS)) # Regex that matches whole repo/group slugs. -SLUG_RE = re.compile('[^{}]+'.format(SLUG_BAD_CHARS)) +SLUG_RE = re.compile(r'[^{}]+'.format(SLUG_BAD_CHARS)) _license_cache = None @@ -81,12 +80,17 @@ def repo_name_slug(value): This function is called on each creation/modification of repository to prevent bad names in repo """ + replacement_char = '-' - slug = remove_formatting(value) + slug = strip_tags(value) + slug = convert_accented_entities(slug) + slug = convert_misc_entities(slug) + slug = SLUG_BAD_CHAR_RE.sub('', slug) - slug = re.sub('[\s]+', '-', slug) + slug = re.sub(r'[\s]+', '-', slug) slug = collapse(slug, replacement_char) + return slug @@ -96,10 +100,10 @@ def repo_name_slug(value): def get_repo_slug(request): _repo = '' - if hasattr(request, 'db_repo'): + if hasattr(request, 'db_repo_name'): # if our requests has set db reference use it for name, this # translates the example.com/_ into proper repo names - _repo = request.db_repo.repo_name + _repo = request.db_repo_name elif getattr(request, 'matchdict', None): # pyramid _repo = request.matchdict.get('repo_name') @@ -162,7 +166,7 @@ def get_filesystem_repos(path, recursive log.debug('now scanning in %s location recursive:%s...', path, recursive) def _get_repos(p): - dirpaths = _get_dirpaths(p) + dirpaths = get_dirpaths(p) if not _is_dir_writable(p): log.warning('repo path without write access: %s', p) @@ -194,7 +198,7 @@ def get_filesystem_repos(path, recursive return _get_repos(path) -def _get_dirpaths(p): +def get_dirpaths(p: str) -> list: try: # OS-independable way of checking if we have at least read-only # access or not. @@ -214,7 +218,7 @@ def _get_dirpaths(p): def _has_correct_type(item): if type(item) is not expected_type: log.error( - "Ignoring path %s since it cannot be decoded into unicode.", + "Ignoring path %s since it cannot be decoded into str.", # Using "repr" to make sure that we see the byte value in case # of support. repr(item)) @@ -372,7 +376,7 @@ def config_data_from_db(clear_session=Tr log.debug( 'settings ui from db@repo[%s]: %s', repo, - ','.join(map(lambda s: '[{}] {}={}'.format(*s), ui_data))) + ','.join(['[{}] {}={}'.format(*s) for s in ui_data])) if clear_session: meta.Session.remove() @@ -441,7 +445,7 @@ def set_rhodecode_config(config): from rhodecode.model.settings import SettingsModel app_settings = SettingsModel().get_all_settings() - for k, v in app_settings.items(): + for k, v in list(app_settings.items()): config[k] = v @@ -459,9 +463,9 @@ def get_rhodecode_base_path(): Returns the base path. The base path is the filesystem path which points to the repository store. """ - from rhodecode.model.settings import SettingsModel - paths_ui = SettingsModel().get_ui_by_section_and_key('paths', '/') - return safe_str(paths_ui.ui_value) + + import rhodecode + return rhodecode.CONFIG['default_base_path'] def map_groups(path): @@ -531,10 +535,10 @@ def repo2db_mapper(initial_repo_list, re enable_downloads = defs.get('repo_enable_downloads') private = defs.get('repo_private') - for name, repo in initial_repo_list.items(): + for name, repo in list(initial_repo_list.items()): group = map_groups(name) - unicode_name = safe_unicode(name) - db_repo = repo_model.get_by_repo_name(unicode_name) + str_name = safe_str(name) + db_repo = repo_model.get_by_repo_name(str_name) # found repo that is on filesystem not in RhodeCode database if not db_repo: log.info('repository %s not found, creating now', name) @@ -574,7 +578,7 @@ def repo2db_mapper(initial_repo_list, re if remove_obsolete: # remove from database those repositories that are not in the filesystem for repo in sa.query(Repository).all(): - if repo.repo_name not in initial_repo_list.keys(): + if repo.repo_name not in list(initial_repo_list.keys()): log.debug("Removing non-existing repository found in db `%s`", repo.repo_name) try: @@ -594,13 +598,14 @@ def repo2db_mapper(initial_repo_list, re return gr_name initial_repo_group_list = [splitter(x) for x in - initial_repo_list.keys() if splitter(x)] + list(initial_repo_list.keys()) if splitter(x)] # remove from database those repository groups that are not in the # filesystem due to parent child relationships we need to delete them # in a specific order of most nested first all_groups = [x.group_name for x in sa.query(RepoGroup).all()] - nested_sort = lambda gr: len(gr.split('/')) + def nested_sort(gr): + return len(gr.split('/')) for group_name in sorted(all_groups, key=nested_sort, reverse=True): if group_name not in initial_repo_group_list: repo_group = RepoGroup.get_by_group_name(group_name) @@ -638,7 +643,7 @@ def load_rcextensions(root_path): rcextensions = __import__('rcextensions') except ImportError: if os.path.isdir(os.path.join(path, 'rcextensions')): - log.warn('Unable to load rcextensions from %s', path) + log.warning('Unable to load rcextensions from %s', path) rcextensions = None if rcextensions: @@ -676,8 +681,11 @@ def create_test_index(repo_location, con """ Makes default test index. """ - import rc_testdata - + try: + import rc_testdata + except ImportError: + raise ImportError('Failed to import rc_testdata, ' + 'please make sure this package is installed from requirements_test.txt') rc_testdata.extract_search_index( 'vcs_search_index', os.path.dirname(config['search.location'])) @@ -696,13 +704,16 @@ def create_test_database(test_path, conf Makes a fresh database. """ from rhodecode.lib.db_manage import DbManage + from rhodecode.lib.utils2 import get_encryption_key # PART ONE create db dbconf = config['sqlalchemy.db1.url'] + enc_key = get_encryption_key(config) + log.debug('making test db %s', dbconf) dbmanage = DbManage(log_sql=False, dbconf=dbconf, root=config['here'], - tests=True, cli_args={'force_ask': True}) + tests=True, cli_args={'force_ask': True}, enc_key=enc_key) dbmanage.create_tables(override=True) dbmanage.set_db_version() # for tests dynamically set new root paths based on generated content @@ -752,7 +763,7 @@ def password_changed(auth_user, session) if auth_user.username == User.DEFAULT_USER or auth_user.user_id is None: return False - password_hash = md5(auth_user.password) if auth_user.password else None + password_hash = md5(safe_bytes(auth_user.password)) if auth_user.password else None rhodecode_user = session.get('rhodecode_user', {}) session_password_hash = rhodecode_user.get('password', '') return password_hash != session_password_hash @@ -777,7 +788,7 @@ def generate_platform_uuid(): try: uuid_list = [platform.platform()] - return hashlib.sha256(':'.join(uuid_list)).hexdigest() + return sha256_safe(':'.join(uuid_list)) except Exception as e: log.error('Failed to generate host uuid: %s', e) return 'UNDEFINED' diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py --- a/rhodecode/lib/utils2.py +++ b/rhodecode/lib/utils2.py @@ -39,27 +39,23 @@ import getpass import socket import errno import random -from functools import update_wrapper, partial, wraps +import functools from contextlib import closing import pygments.lexers import sqlalchemy +import sqlalchemy.event import sqlalchemy.engine.url import sqlalchemy.exc import sqlalchemy.sql import webob -import pyramid.threadlocal from pyramid.settings import asbool import rhodecode from rhodecode.translation import _, _pluralize from rhodecode.lib.str_utils import safe_str, safe_int, safe_bytes from rhodecode.lib.hash_utils import md5, md5_safe, sha1, sha1_safe -from rhodecode.lib.type_utils import aslist, str2bool -from functools import reduce - -#TODO: there's no longer safe_unicode, we mock it now, but should remove it -safe_unicode = safe_str +from rhodecode.lib.type_utils import aslist, str2bool, StrictAttributeDict, AttributeDict def __get_lem(extra_mapping=None): @@ -85,7 +81,7 @@ def __get_lem(extra_mapping=None): for lx, t in sorted(pygments.lexers.LEXERS.items()): m = list(map(__clean, t[-2])) if m: - m = reduce(lambda x, y: x + y, m) + m = functools.reduce(lambda x, y: x + y, m) for ext in m: desc = lx.replace('Lexer', '') d[ext].append(desc) @@ -94,7 +90,7 @@ def __get_lem(extra_mapping=None): extra_mapping = extra_mapping or {} if extra_mapping: - for k, v in extra_mapping.items(): + for k, v in list(extra_mapping.items()): if k not in data: # register new mapping2lexer data[k] = [v] @@ -102,7 +98,7 @@ def __get_lem(extra_mapping=None): return data -def convert_line_endings(line, mode): +def convert_line_endings(line: str, mode) -> str: """ Converts a given line "line end" accordingly to given mode @@ -113,7 +109,6 @@ def convert_line_endings(line, mode): :param line: given line to convert :param mode: mode to convert to - :rtype: str :return: converted line according to mode """ if mode == 0: @@ -127,14 +122,13 @@ def convert_line_endings(line, mode): return line -def detect_mode(line, default): +def detect_mode(line: str, default) -> int: """ Detects line break for given line, if line break couldn't be found given default value is returned :param line: str line :param default: default - :rtype: int :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS """ if line.endswith('\r\n'): @@ -159,14 +153,18 @@ def remove_prefix(s, prefix): return s -def find_calling_context(ignore_modules=None): +def find_calling_context(ignore_modules=None, depth=4, output_writer=None, indent=True): """ Look through the calling stack and return the frame which called this function and is part of core module ( ie. rhodecode.* ) :param ignore_modules: list of modules to ignore eg. ['rhodecode.lib'] + :param depth: + :param output_writer: + :param indent: usage:: + from rhodecode.lib.utils2 import find_calling_context calling_context = find_calling_context(ignore_modules=[ @@ -174,24 +172,36 @@ def find_calling_context(ignore_modules= 'rhodecode.model.settings', ]) - if calling_context: - cc_str = 'call context %s:%s' % ( - calling_context.f_code.co_filename, - calling_context.f_lineno, - ) - print(cc_str) """ + import inspect + if not output_writer: + try: + from rich import print as pprint + except ImportError: + pprint = print + output_writer = pprint - ignore_modules = ignore_modules or [] + frame = inspect.currentframe() + cc = [] + try: + for i in range(depth): # current frame + 3 callers + frame = frame.f_back + if not frame: + break - f = sys._getframe(2) - while f.f_back is not None: - name = f.f_globals.get('__name__') - if name and name.startswith(__name__.split('.')[0]): + info = inspect.getframeinfo(frame) + name = frame.f_globals.get('__name__') if name not in ignore_modules: - return f - f = f.f_back - return None + cc.insert(0, f'CALL_CONTEXT:{i}: file {info.filename}:{info.lineno} -> {info.function}') + finally: + # Avoids a reference cycle + del frame + + output_writer('* INFO: This code was called from: *') + for cnt, frm_info in enumerate(cc): + if not indent: + cnt = 1 + output_writer(' ' * cnt + frm_info) def ping_connection(connection, branch): @@ -252,15 +262,10 @@ def engine_from_config(configuration, pr parameters, context, executemany): setattr(conn, 'query_start_time', time.time()) log.info(color_sql(">>>>> STARTING QUERY >>>>>")) - calling_context = find_calling_context(ignore_modules=[ + find_calling_context(ignore_modules=[ 'rhodecode.lib.caching_query', 'rhodecode.model.settings', - ]) - if calling_context: - log.info(color_sql('call context %s:%s' % ( - calling_context.f_code.co_filename, - calling_context.f_lineno, - ))) + ], output_writer=log.info) def after_cursor_execute(conn, cursor, statement, parameters, context, executemany): @@ -272,10 +277,12 @@ def engine_from_config(configuration, pr return engine -def get_encryption_key(config): +def get_encryption_key(config) -> bytes: secret = config.get('rhodecode.encrypted_values.secret') default = config['beaker.session.secret'] - return secret or default + enc_key = secret or default + + return safe_bytes(enc_key) def age(prevdate, now=None, show_short_version=False, show_suffix=True, short_format=False): @@ -476,7 +483,7 @@ def get_host_info(request): qualified_home_url = request.route_url('home') parsed_url = urlobject.URLObject(qualified_home_url) - decoded_path = safe_unicode(urllib.parse.unquote(parsed_url.path.rstrip('/'))) + decoded_path = safe_str(urllib.parse.unquote(parsed_url.path.rstrip('/'))) return { 'scheme': parsed_url.scheme, @@ -488,7 +495,7 @@ def get_host_info(request): def get_clone_url(request, uri_tmpl, repo_name, repo_id, repo_type, **override): qualified_home_url = request.route_url('home') parsed_url = urlobject.URLObject(qualified_home_url) - decoded_path = safe_unicode(urllib.parse.unquote(parsed_url.path.rstrip('/'))) + decoded_path = safe_str(urllib.parse.unquote(parsed_url.path.rstrip('/'))) args = { 'scheme': parsed_url.scheme, @@ -505,8 +512,9 @@ def get_clone_url(request, uri_tmpl, rep args.update(override) args['user'] = urllib.parse.quote(safe_str(args['user'])) - for k, v in args.items(): - uri_tmpl = uri_tmpl.replace('{%s}' % k, v) + for k, v in list(args.items()): + tmpl_key = '{%s}' % k + uri_tmpl = uri_tmpl.replace(tmpl_key, v) # special case for SVN clone url if repo_type == 'svn': @@ -516,7 +524,7 @@ def get_clone_url(request, uri_tmpl, rep url_obj = urlobject.URLObject(uri_tmpl) url = url_obj.with_netloc(url_obj.netloc.lstrip('@')) - return safe_unicode(url) + return safe_str(url) def get_commit_safe(repo, commit_id=None, commit_idx=None, pre_load=None, @@ -594,36 +602,6 @@ def extract_mentioned_users(s): return sorted(list(usrs), key=lambda k: k.lower()) -class AttributeDictBase(dict): - def __getstate__(self): - odict = self.__dict__ # get attribute dictionary - return odict - - def __setstate__(self, dict): - self.__dict__ = dict - - __setattr__ = dict.__setitem__ - __delattr__ = dict.__delitem__ - - -class StrictAttributeDict(AttributeDictBase): - """ - Strict Version of Attribute dict which raises an Attribute error when - requested attribute is not set - """ - def __getattr__(self, attr): - try: - return self[attr] - except KeyError: - raise AttributeError('%s object has no attribute %s' % ( - self.__class__, attr)) - - -class AttributeDict(AttributeDictBase): - def __getattr__(self, attr): - return self.get(attr, None) - - def fix_PATH(os_=None): """ Get current active python path, and append it to PATH variable to fix @@ -635,19 +613,18 @@ def fix_PATH(os_=None): os = os_ cur_path = os.path.split(sys.executable)[0] + os_path = os.environ['PATH'] if not os.environ['PATH'].startswith(cur_path): - os.environ['PATH'] = '%s:%s' % (cur_path, os.environ['PATH']) + os.environ['PATH'] = f'{cur_path}:{os_path}' def obfuscate_url_pw(engine): _url = engine or '' try: _url = sqlalchemy.engine.url.make_url(engine) - if _url.password: - _url.password = 'XXXXX' except Exception: pass - return str(_url) + return repr(_url) def get_server_url(environ): @@ -695,6 +672,7 @@ def get_current_rhodecode_user(request=N """ Gets rhodecode user from request """ + import pyramid.threadlocal pyramid_request = request or pyramid.threadlocal.get_current_request() # web case @@ -837,51 +815,15 @@ class Optional(object): def glob2re(pat): - """ - Translate a shell PATTERN to a regular expression. - - There is no way to quote meta-characters. - """ - - i, n = 0, len(pat) - res = '' - while i < n: - c = pat[i] - i = i+1 - if c == '*': - #res = res + '.*' - res = res + '[^/]*' - elif c == '?': - #res = res + '.' - res = res + '[^/]' - elif c == '[': - j = i - if j < n and pat[j] == '!': - j = j+1 - if j < n and pat[j] == ']': - j = j+1 - while j < n and pat[j] != ']': - j = j+1 - if j >= n: - res = res + '\\[' - else: - stuff = pat[i:j].replace('\\','\\\\') - i = j+1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] == '^': - stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) - else: - res = res + re.escape(c) - return res + '\Z(?ms)' + import fnmatch + return fnmatch.translate(pat) def parse_byte_string(size_str): match = re.match(r'(\d+)(MB|KB)', size_str, re.IGNORECASE) if not match: - raise ValueError('Given size:%s is invalid, please make sure ' - 'to use format of (MB|KB)' % size_str) + raise ValueError(f'Given size:{size_str} is invalid, please make sure ' + f'to use format of (MB|KB)') _parts = match.groups() num, type_ = _parts @@ -911,7 +853,7 @@ class CachedProperty(object): if func_name is None: func_name = func.__name__ self.data = (func, func_name) - update_wrapper(self, func) + functools.update_wrapper(self, func) def __get__(self, inst, class_): if inst is None: @@ -921,7 +863,7 @@ class CachedProperty(object): value = func(inst) inst.__dict__[func_name] = value if '_invalidate_prop_cache' not in inst.__dict__: - inst.__dict__['_invalidate_prop_cache'] = partial( + inst.__dict__['_invalidate_prop_cache'] = functools.partial( self._invalidate_prop_cache, inst) return value @@ -967,7 +909,7 @@ def retry(func=None, exception=Exception """ if func is None: - return partial( + return functools.partial( retry, exception=exception, n_tries=n_tries, @@ -976,7 +918,7 @@ def retry(func=None, exception=Exception logger=logger, ) - @wraps(func) + @functools.wraps(func) def wrapper(*args, **kwargs): _n_tries, n_delay = n_tries, delay log = logging.getLogger('rhodecode.retry') @@ -1016,7 +958,7 @@ def user_agent_normalizer(user_agent_raw parts = ua.split(' ') if parts: ua = parts[0] - ua = re.sub('\.windows\.\d', '', ua).strip() + ua = re.sub(r'\.windows\.\d', '', ua).strip() return ua except Exception: