# HG changeset patch
# User RhodeCode Admin <admin@rhodecode.com>
# Date 2023-07-18 09:42:39
# Node ID e00a2a480d7dff61743caef8462be1c24684cf94
# Parent  d1c4b80b552a7f3bb0a9dc955c96871a569eec86

utils: ported to python3 and new app

diff --git a/rhodecode/lib/utils.py b/rhodecode/lib/utils.py
--- a/rhodecode/lib/utils.py
+++ b/rhodecode/lib/utils.py
@@ -23,7 +23,6 @@ Utilities library for RhodeCode
 
 import datetime
 import decorator
-import json
 import logging
 import os
 import re
@@ -34,20 +33,20 @@ import tempfile
 import traceback
 import tarfile
 import warnings
-import hashlib
 from os.path import join as jn
 
 import paste
 import pkg_resources
-from webhelpers2.text import collapse, remove_formatting
+from webhelpers2.text import collapse, strip_tags, convert_accented_entities, convert_misc_entities
+
 from mako import exceptions
-from pyramid.threadlocal import get_current_registry
 
+from rhodecode.lib.hash_utils import sha256_safe, md5, sha1
+from rhodecode.lib.str_utils import safe_bytes, safe_str
 from rhodecode.lib.vcs.backends.base import Config
 from rhodecode.lib.vcs.exceptions import VCSError
 from rhodecode.lib.vcs.utils.helpers import get_scm, get_scm_backend
-from rhodecode.lib.utils2 import (
-    safe_str, safe_unicode, get_current_rhodecode_user, md5, sha1)
+from rhodecode.lib.ext_json import sjson as json
 from rhodecode.model import meta
 from rhodecode.model.db import (
     Repository, User, RhodeCodeUi, UserLog, RepoGroup, UserGroup)
@@ -61,16 +60,16 @@ REMOVED_REPO_PAT = re.compile(r'rm__\d{8
 # String which contains characters that are not allowed in slug names for
 # repositories or repository groups. It is properly escaped to use it in
 # regular expressions.
-SLUG_BAD_CHARS = re.escape('`?=[]\;\'"<>,/~!@#$%^&*()+{}|:')
+SLUG_BAD_CHARS = re.escape(r'`?=[]\;\'"<>,/~!@#$%^&*()+{}|:')
 
 # Regex that matches forbidden characters in repo/group slugs.
-SLUG_BAD_CHAR_RE = re.compile('[{}\x00-\x08\x0b-\x0c\x0e-\x1f]'.format(SLUG_BAD_CHARS))
+SLUG_BAD_CHAR_RE = re.compile(r'[{}\x00-\x08\x0b-\x0c\x0e-\x1f]'.format(SLUG_BAD_CHARS))
 
 # Regex that matches allowed characters in repo/group slugs.
-SLUG_GOOD_CHAR_RE = re.compile('[^{}]'.format(SLUG_BAD_CHARS))
+SLUG_GOOD_CHAR_RE = re.compile(r'[^{}]'.format(SLUG_BAD_CHARS))
 
 # Regex that matches whole repo/group slugs.
-SLUG_RE = re.compile('[^{}]+'.format(SLUG_BAD_CHARS))
+SLUG_RE = re.compile(r'[^{}]+'.format(SLUG_BAD_CHARS))
 
 _license_cache = None
 
@@ -81,12 +80,17 @@ def repo_name_slug(value):
     This function is called on each creation/modification
     of repository to prevent bad names in repo
     """
+
     replacement_char = '-'
 
-    slug = remove_formatting(value)
+    slug = strip_tags(value)
+    slug = convert_accented_entities(slug)
+    slug = convert_misc_entities(slug)
+
     slug = SLUG_BAD_CHAR_RE.sub('', slug)
-    slug = re.sub('[\s]+', '-', slug)
+    slug = re.sub(r'[\s]+', '-', slug)
     slug = collapse(slug, replacement_char)
+
     return slug
 
 
@@ -96,10 +100,10 @@ def repo_name_slug(value):
 def get_repo_slug(request):
     _repo = ''
 
-    if hasattr(request, 'db_repo'):
+    if hasattr(request, 'db_repo_name'):
         # if our requests has set db reference use it for name, this
         # translates the example.com/_<id> into proper repo names
-        _repo = request.db_repo.repo_name
+        _repo = request.db_repo_name
     elif getattr(request, 'matchdict', None):
         # pyramid
         _repo = request.matchdict.get('repo_name')
@@ -162,7 +166,7 @@ def get_filesystem_repos(path, recursive
     log.debug('now scanning in %s location recursive:%s...', path, recursive)
 
     def _get_repos(p):
-        dirpaths = _get_dirpaths(p)
+        dirpaths = get_dirpaths(p)
         if not _is_dir_writable(p):
             log.warning('repo path without write access: %s', p)
 
@@ -194,7 +198,7 @@ def get_filesystem_repos(path, recursive
     return _get_repos(path)
 
 
-def _get_dirpaths(p):
+def get_dirpaths(p: str) -> list:
     try:
         # OS-independable way of checking if we have at least read-only
         # access or not.
@@ -214,7 +218,7 @@ def _get_dirpaths(p):
     def _has_correct_type(item):
         if type(item) is not expected_type:
             log.error(
-                "Ignoring path %s since it cannot be decoded into unicode.",
+                "Ignoring path %s since it cannot be decoded into str.",
                 # Using "repr" to make sure that we see the byte value in case
                 # of support.
                 repr(item))
@@ -372,7 +376,7 @@ def config_data_from_db(clear_session=Tr
     log.debug(
         'settings ui from db@repo[%s]: %s',
         repo,
-        ','.join(map(lambda s: '[{}] {}={}'.format(*s), ui_data)))
+        ','.join(['[{}] {}={}'.format(*s) for s in ui_data]))
     if clear_session:
         meta.Session.remove()
 
@@ -441,7 +445,7 @@ def set_rhodecode_config(config):
     from rhodecode.model.settings import SettingsModel
     app_settings = SettingsModel().get_all_settings()
 
-    for k, v in app_settings.items():
+    for k, v in list(app_settings.items()):
         config[k] = v
 
 
@@ -459,9 +463,9 @@ def get_rhodecode_base_path():
     Returns the base path. The base path is the filesystem path which points
     to the repository store.
     """
-    from rhodecode.model.settings import SettingsModel
-    paths_ui = SettingsModel().get_ui_by_section_and_key('paths', '/')
-    return safe_str(paths_ui.ui_value)
+
+    import rhodecode
+    return rhodecode.CONFIG['default_base_path']
 
 
 def map_groups(path):
@@ -531,10 +535,10 @@ def repo2db_mapper(initial_repo_list, re
     enable_downloads = defs.get('repo_enable_downloads')
     private = defs.get('repo_private')
 
-    for name, repo in initial_repo_list.items():
+    for name, repo in list(initial_repo_list.items()):
         group = map_groups(name)
-        unicode_name = safe_unicode(name)
-        db_repo = repo_model.get_by_repo_name(unicode_name)
+        str_name = safe_str(name)
+        db_repo = repo_model.get_by_repo_name(str_name)
         # found repo that is on filesystem not in RhodeCode database
         if not db_repo:
             log.info('repository %s not found, creating now', name)
@@ -574,7 +578,7 @@ def repo2db_mapper(initial_repo_list, re
     if remove_obsolete:
         # remove from database those repositories that are not in the filesystem
         for repo in sa.query(Repository).all():
-            if repo.repo_name not in initial_repo_list.keys():
+            if repo.repo_name not in list(initial_repo_list.keys()):
                 log.debug("Removing non-existing repository found in db `%s`",
                           repo.repo_name)
                 try:
@@ -594,13 +598,14 @@ def repo2db_mapper(initial_repo_list, re
             return gr_name
 
         initial_repo_group_list = [splitter(x) for x in
-                                   initial_repo_list.keys() if splitter(x)]
+                                   list(initial_repo_list.keys()) if splitter(x)]
 
         # remove from database those repository groups that are not in the
         # filesystem due to parent child relationships we need to delete them
         # in a specific order of most nested first
         all_groups = [x.group_name for x in sa.query(RepoGroup).all()]
-        nested_sort = lambda gr: len(gr.split('/'))
+        def nested_sort(gr):
+            return len(gr.split('/'))
         for group_name in sorted(all_groups, key=nested_sort, reverse=True):
             if group_name not in initial_repo_group_list:
                 repo_group = RepoGroup.get_by_group_name(group_name)
@@ -638,7 +643,7 @@ def load_rcextensions(root_path):
         rcextensions = __import__('rcextensions')
     except ImportError:
         if os.path.isdir(os.path.join(path, 'rcextensions')):
-            log.warn('Unable to load rcextensions from %s', path)
+            log.warning('Unable to load rcextensions from %s', path)
         rcextensions = None
 
     if rcextensions:
@@ -676,8 +681,11 @@ def create_test_index(repo_location, con
     """
     Makes default test index.
     """
-    import rc_testdata
-
+    try:
+        import rc_testdata
+    except ImportError:
+        raise ImportError('Failed to import rc_testdata, '
+                          'please make sure this package is installed from requirements_test.txt')
     rc_testdata.extract_search_index(
         'vcs_search_index', os.path.dirname(config['search.location']))
 
@@ -696,13 +704,16 @@ def create_test_database(test_path, conf
     Makes a fresh database.
     """
     from rhodecode.lib.db_manage import DbManage
+    from rhodecode.lib.utils2 import get_encryption_key
 
     # PART ONE create db
     dbconf = config['sqlalchemy.db1.url']
+    enc_key = get_encryption_key(config)
+
     log.debug('making test db %s', dbconf)
 
     dbmanage = DbManage(log_sql=False, dbconf=dbconf, root=config['here'],
-                        tests=True, cli_args={'force_ask': True})
+                        tests=True, cli_args={'force_ask': True}, enc_key=enc_key)
     dbmanage.create_tables(override=True)
     dbmanage.set_db_version()
     # for tests dynamically set new root paths based on generated content
@@ -752,7 +763,7 @@ def password_changed(auth_user, session)
     if auth_user.username == User.DEFAULT_USER or auth_user.user_id is None:
         return False
 
-    password_hash = md5(auth_user.password) if auth_user.password else None
+    password_hash = md5(safe_bytes(auth_user.password)) if auth_user.password else None
     rhodecode_user = session.get('rhodecode_user', {})
     session_password_hash = rhodecode_user.get('password', '')
     return password_hash != session_password_hash
@@ -777,7 +788,7 @@ def generate_platform_uuid():
 
     try:
         uuid_list = [platform.platform()]
-        return hashlib.sha256(':'.join(uuid_list)).hexdigest()
+        return sha256_safe(':'.join(uuid_list))
     except Exception as e:
         log.error('Failed to generate host uuid: %s', e)
         return 'UNDEFINED'
diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py
--- a/rhodecode/lib/utils2.py
+++ b/rhodecode/lib/utils2.py
@@ -39,27 +39,23 @@ import getpass
 import socket
 import errno
 import random
-from functools import update_wrapper, partial, wraps
+import functools
 from contextlib import closing
 
 import pygments.lexers
 import sqlalchemy
+import sqlalchemy.event
 import sqlalchemy.engine.url
 import sqlalchemy.exc
 import sqlalchemy.sql
 import webob
-import pyramid.threadlocal
 from pyramid.settings import asbool
 
 import rhodecode
 from rhodecode.translation import _, _pluralize
 from rhodecode.lib.str_utils import safe_str, safe_int, safe_bytes
 from rhodecode.lib.hash_utils import md5, md5_safe, sha1, sha1_safe
-from rhodecode.lib.type_utils import aslist, str2bool
-from functools import reduce
-
-#TODO: there's no longer safe_unicode, we mock it now, but should remove it
-safe_unicode = safe_str
+from rhodecode.lib.type_utils import aslist, str2bool, StrictAttributeDict, AttributeDict
 
 
 def __get_lem(extra_mapping=None):
@@ -85,7 +81,7 @@ def __get_lem(extra_mapping=None):
     for lx, t in sorted(pygments.lexers.LEXERS.items()):
         m = list(map(__clean, t[-2]))
         if m:
-            m = reduce(lambda x, y: x + y, m)
+            m = functools.reduce(lambda x, y: x + y, m)
             for ext in m:
                 desc = lx.replace('Lexer', '')
                 d[ext].append(desc)
@@ -94,7 +90,7 @@ def __get_lem(extra_mapping=None):
 
     extra_mapping = extra_mapping or {}
     if extra_mapping:
-        for k, v in extra_mapping.items():
+        for k, v in list(extra_mapping.items()):
             if k not in data:
                 # register new mapping2lexer
                 data[k] = [v]
@@ -102,7 +98,7 @@ def __get_lem(extra_mapping=None):
     return data
 
 
-def convert_line_endings(line, mode):
+def convert_line_endings(line: str, mode) -> str:
     """
     Converts a given line  "line end" accordingly to given mode
 
@@ -113,7 +109,6 @@ def convert_line_endings(line, mode):
 
     :param line: given line to convert
     :param mode: mode to convert to
-    :rtype: str
     :return: converted line according to mode
     """
     if mode == 0:
@@ -127,14 +122,13 @@ def convert_line_endings(line, mode):
     return line
 
 
-def detect_mode(line, default):
+def detect_mode(line: str, default) -> int:
     """
     Detects line break for given line, if line break couldn't be found
     given default value is returned
 
     :param line: str line
     :param default: default
-    :rtype: int
     :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS
     """
     if line.endswith('\r\n'):
@@ -159,14 +153,18 @@ def remove_prefix(s, prefix):
     return s
 
 
-def find_calling_context(ignore_modules=None):
+def find_calling_context(ignore_modules=None, depth=4, output_writer=None, indent=True):
     """
     Look through the calling stack and return the frame which called
     this function and is part of core module ( ie. rhodecode.* )
 
     :param ignore_modules: list of modules to ignore eg. ['rhodecode.lib']
+    :param depth:
+    :param output_writer:
+    :param indent:
 
     usage::
+
         from rhodecode.lib.utils2 import find_calling_context
 
         calling_context = find_calling_context(ignore_modules=[
@@ -174,24 +172,36 @@ def find_calling_context(ignore_modules=
             'rhodecode.model.settings',
         ])
 
-        if calling_context:
-            cc_str = 'call context %s:%s' % (
-                calling_context.f_code.co_filename,
-                calling_context.f_lineno,
-            )
-            print(cc_str)
     """
+    import inspect
+    if not output_writer:
+        try:
+            from rich import print as pprint
+        except ImportError:
+            pprint = print
+        output_writer = pprint
 
-    ignore_modules = ignore_modules or []
+    frame = inspect.currentframe()
+    cc = []
+    try:
+        for i in range(depth):  # current frame + 3 callers
+            frame = frame.f_back
+            if not frame:
+                break
 
-    f = sys._getframe(2)
-    while f.f_back is not None:
-        name = f.f_globals.get('__name__')
-        if name and name.startswith(__name__.split('.')[0]):
+            info = inspect.getframeinfo(frame)
+            name = frame.f_globals.get('__name__')
             if name not in ignore_modules:
-                return f
-        f = f.f_back
-    return None
+                cc.insert(0, f'CALL_CONTEXT:{i}: file {info.filename}:{info.lineno} -> {info.function}')
+    finally:
+        # Avoids a reference cycle
+        del frame
+
+    output_writer('* INFO: This code was called from: *')
+    for cnt, frm_info in enumerate(cc):
+        if not indent:
+            cnt = 1
+        output_writer(' ' * cnt + frm_info)
 
 
 def ping_connection(connection, branch):
@@ -252,15 +262,10 @@ def engine_from_config(configuration, pr
                                   parameters, context, executemany):
             setattr(conn, 'query_start_time', time.time())
             log.info(color_sql(">>>>> STARTING QUERY >>>>>"))
-            calling_context = find_calling_context(ignore_modules=[
+            find_calling_context(ignore_modules=[
                 'rhodecode.lib.caching_query',
                 'rhodecode.model.settings',
-            ])
-            if calling_context:
-                log.info(color_sql('call context %s:%s' % (
-                    calling_context.f_code.co_filename,
-                    calling_context.f_lineno,
-                )))
+            ], output_writer=log.info)
 
         def after_cursor_execute(conn, cursor, statement,
                                  parameters, context, executemany):
@@ -272,10 +277,12 @@ def engine_from_config(configuration, pr
     return engine
 
 
-def get_encryption_key(config):
+def get_encryption_key(config) -> bytes:
     secret = config.get('rhodecode.encrypted_values.secret')
     default = config['beaker.session.secret']
-    return secret or default
+    enc_key = secret or default
+
+    return safe_bytes(enc_key)
 
 
 def age(prevdate, now=None, show_short_version=False, show_suffix=True, short_format=False):
@@ -476,7 +483,7 @@ def get_host_info(request):
 
     qualified_home_url = request.route_url('home')
     parsed_url = urlobject.URLObject(qualified_home_url)
-    decoded_path = safe_unicode(urllib.parse.unquote(parsed_url.path.rstrip('/')))
+    decoded_path = safe_str(urllib.parse.unquote(parsed_url.path.rstrip('/')))
 
     return {
         'scheme': parsed_url.scheme,
@@ -488,7 +495,7 @@ def get_host_info(request):
 def get_clone_url(request, uri_tmpl, repo_name, repo_id, repo_type, **override):
     qualified_home_url = request.route_url('home')
     parsed_url = urlobject.URLObject(qualified_home_url)
-    decoded_path = safe_unicode(urllib.parse.unquote(parsed_url.path.rstrip('/')))
+    decoded_path = safe_str(urllib.parse.unquote(parsed_url.path.rstrip('/')))
 
     args = {
         'scheme': parsed_url.scheme,
@@ -505,8 +512,9 @@ def get_clone_url(request, uri_tmpl, rep
     args.update(override)
     args['user'] = urllib.parse.quote(safe_str(args['user']))
 
-    for k, v in args.items():
-        uri_tmpl = uri_tmpl.replace('{%s}' % k, v)
+    for k, v in list(args.items()):
+        tmpl_key = '{%s}' % k
+        uri_tmpl = uri_tmpl.replace(tmpl_key, v)
 
     # special case for SVN clone url
     if repo_type == 'svn':
@@ -516,7 +524,7 @@ def get_clone_url(request, uri_tmpl, rep
     url_obj = urlobject.URLObject(uri_tmpl)
     url = url_obj.with_netloc(url_obj.netloc.lstrip('@'))
 
-    return safe_unicode(url)
+    return safe_str(url)
 
 
 def get_commit_safe(repo, commit_id=None, commit_idx=None, pre_load=None,
@@ -594,36 +602,6 @@ def extract_mentioned_users(s):
     return sorted(list(usrs), key=lambda k: k.lower())
 
 
-class AttributeDictBase(dict):
-    def __getstate__(self):
-        odict = self.__dict__  # get attribute dictionary
-        return odict
-
-    def __setstate__(self, dict):
-        self.__dict__ = dict
-
-    __setattr__ = dict.__setitem__
-    __delattr__ = dict.__delitem__
-
-
-class StrictAttributeDict(AttributeDictBase):
-    """
-    Strict Version of Attribute dict which raises an Attribute error when
-    requested attribute is not set
-    """
-    def __getattr__(self, attr):
-        try:
-            return self[attr]
-        except KeyError:
-            raise AttributeError('%s object has no attribute %s' % (
-                self.__class__, attr))
-
-
-class AttributeDict(AttributeDictBase):
-    def __getattr__(self, attr):
-        return self.get(attr, None)
-
-
 def fix_PATH(os_=None):
     """
     Get current active python path, and append it to PATH variable to fix
@@ -635,19 +613,18 @@ def fix_PATH(os_=None):
         os = os_
 
     cur_path = os.path.split(sys.executable)[0]
+    os_path = os.environ['PATH']
     if not os.environ['PATH'].startswith(cur_path):
-        os.environ['PATH'] = '%s:%s' % (cur_path, os.environ['PATH'])
+        os.environ['PATH'] = f'{cur_path}:{os_path}'
 
 
 def obfuscate_url_pw(engine):
     _url = engine or ''
     try:
         _url = sqlalchemy.engine.url.make_url(engine)
-        if _url.password:
-            _url.password = 'XXXXX'
     except Exception:
         pass
-    return str(_url)
+    return repr(_url)
 
 
 def get_server_url(environ):
@@ -695,6 +672,7 @@ def get_current_rhodecode_user(request=N
     """
     Gets rhodecode user from request
     """
+    import pyramid.threadlocal
     pyramid_request = request or pyramid.threadlocal.get_current_request()
 
     # web case
@@ -837,51 +815,15 @@ class Optional(object):
 
 
 def glob2re(pat):
-    """
-    Translate a shell PATTERN to a regular expression.
-
-    There is no way to quote meta-characters.
-    """
-
-    i, n = 0, len(pat)
-    res = ''
-    while i < n:
-        c = pat[i]
-        i = i+1
-        if c == '*':
-            #res = res + '.*'
-            res = res + '[^/]*'
-        elif c == '?':
-            #res = res + '.'
-            res = res + '[^/]'
-        elif c == '[':
-            j = i
-            if j < n and pat[j] == '!':
-                j = j+1
-            if j < n and pat[j] == ']':
-                j = j+1
-            while j < n and pat[j] != ']':
-                j = j+1
-            if j >= n:
-                res = res + '\\['
-            else:
-                stuff = pat[i:j].replace('\\','\\\\')
-                i = j+1
-                if stuff[0] == '!':
-                    stuff = '^' + stuff[1:]
-                elif stuff[0] == '^':
-                    stuff = '\\' + stuff
-                res = '%s[%s]' % (res, stuff)
-        else:
-            res = res + re.escape(c)
-    return res + '\Z(?ms)'
+    import fnmatch
+    return fnmatch.translate(pat)
 
 
 def parse_byte_string(size_str):
     match = re.match(r'(\d+)(MB|KB)', size_str, re.IGNORECASE)
     if not match:
-        raise ValueError('Given size:%s is invalid, please make sure '
-                         'to use format of <num>(MB|KB)' % size_str)
+        raise ValueError(f'Given size:{size_str} is invalid, please make sure '
+                         f'to use format of <num>(MB|KB)')
 
     _parts = match.groups()
     num, type_ = _parts
@@ -911,7 +853,7 @@ class CachedProperty(object):
         if func_name is None:
             func_name = func.__name__
         self.data = (func, func_name)
-        update_wrapper(self, func)
+        functools.update_wrapper(self, func)
 
     def __get__(self, inst, class_):
         if inst is None:
@@ -921,7 +863,7 @@ class CachedProperty(object):
         value = func(inst)
         inst.__dict__[func_name] = value
         if '_invalidate_prop_cache' not in inst.__dict__:
-            inst.__dict__['_invalidate_prop_cache'] = partial(
+            inst.__dict__['_invalidate_prop_cache'] = functools.partial(
                 self._invalidate_prop_cache, inst)
         return value
 
@@ -967,7 +909,7 @@ def retry(func=None, exception=Exception
     """
 
     if func is None:
-        return partial(
+        return functools.partial(
             retry,
             exception=exception,
             n_tries=n_tries,
@@ -976,7 +918,7 @@ def retry(func=None, exception=Exception
             logger=logger,
         )
 
-    @wraps(func)
+    @functools.wraps(func)
     def wrapper(*args, **kwargs):
         _n_tries, n_delay = n_tries, delay
         log = logging.getLogger('rhodecode.retry')
@@ -1016,7 +958,7 @@ def user_agent_normalizer(user_agent_raw
             parts = ua.split(' ')
             if parts:
                 ua = parts[0]
-            ua = re.sub('\.windows\.\d', '', ua).strip()
+            ua = re.sub(r'\.windows\.\d', '', ua).strip()
 
         return ua
     except Exception: