# HG changeset patch
# User RhodeCode Admin <admin@rhodecode.com>
# Date 2023-03-06 21:19:44
# Node ID f87c218ed41895c8caf14ec851a137d5ccb0dfb4
# Parent  49ad81de30dd6905895be1e42212a7c38b00117c

core: break down some utils for better imports

diff --git a/rhodecode/lib/hash_utils.py b/rhodecode/lib/hash_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/hash_utils.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import hashlib
+from rhodecode.lib.str_utils import safe_bytes
+
+
+def md5(s):
+    return hashlib.md5(s).hexdigest()
+
+
+def md5_safe(s):
+    return md5(safe_bytes(s))
+
+
+def sha1(s):
+    return hashlib.sha1(s).hexdigest()
+
+
+def sha1_safe(s):
+    return sha1(safe_bytes(s))
diff --git a/rhodecode/lib/str_utils.py b/rhodecode/lib/str_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/str_utils.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import logging
+import rhodecode
+from rhodecode.lib.type_utils import aslist
+
+log = logging.getLogger(__name__)
+
+
+def safe_int(val, default=None) -> int:
+    """
+    Returns int() of val if val is not convertable to int use default
+    instead
+
+    :param val:
+    :param default:
+    """
+
+    try:
+        val = int(val)
+    except (ValueError, TypeError):
+        val = default
+
+    return val
+
+
+def get_default_encodings():
+    return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',')
+
+
+def safe_str(str_, to_encoding=None) -> str:
+    """
+    safe str function. Does few trick to turn unicode_ into string
+
+    :param str_: str to encode
+    :param to_encoding: encode to this type UTF8 default
+    :rtype: str
+    :returns: str object
+    """
+    if isinstance(str_, str):
+        return str_
+
+    # if it's bytes cast to str
+    if not isinstance(str_, bytes):
+        return str(str_)
+
+    to_encoding = to_encoding or get_default_encodings()
+    if not isinstance(to_encoding, (list, tuple)):
+        to_encoding = [to_encoding]
+
+    for enc in to_encoding:
+        try:
+            return str(str_, enc)
+        except UnicodeDecodeError:
+            pass
+
+    return str(str_, to_encoding[0], 'replace')
+
+
+def safe_bytes(str_, from_encoding=None) -> bytes:
+    """
+    safe bytes function. Does few trick to turn str_ into bytes string:
+
+    :param str_: string to decode
+    :param from_encoding: encode from this type UTF8 default
+    :rtype: unicode
+    :returns: unicode object
+    """
+    if isinstance(str_, bytes):
+        return str_
+
+    if not isinstance(str_, str):
+        raise ValueError('safe_bytes cannot convert other types than str: got: {}'.format(type(str_)))
+
+    from_encoding = from_encoding or get_default_encodings()
+    if not isinstance(from_encoding, (list, tuple)):
+        from_encoding = [from_encoding]
+
+    for enc in from_encoding:
+        try:
+            return str_.encode(enc)
+        except UnicodeDecodeError:
+            pass
+
+    return str_.encode(from_encoding[0], 'replace')
+
+
+def ascii_bytes(str_, allow_bytes=False) -> bytes:
+    """
+    Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
+    Fails with UnicodeError on invalid input.
+    This should be used where encoding and "safe" ambiguity should be avoided.
+    Where strings already have been encoded in other ways but still are unicode
+    string - for example to hex, base64, json, urlencoding, or are known to be
+    identifiers.
+    """
+    if allow_bytes and isinstance(str_, bytes):
+        return str_
+
+    if not isinstance(str_, str):
+        raise ValueError('ascii_bytes cannot convert other types than str: got: {}'.format(type(str_)))
+    return str_.encode('ascii')
+
+
+def ascii_str(str_):
+    """
+    Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
+    Fails with UnicodeError on invalid input.
+    This should be used where encoding and "safe" ambiguity should be avoided.
+    Where strings are encoded but also in other ways are known to be ASCII, and
+    where a unicode string is wanted without caring about encoding. For example
+    to hex, base64, urlencoding, or are known to be identifiers.
+    """
+
+    if not isinstance(str_, bytes):
+        raise ValueError('ascii_str cannot convert other types than bytes: got: {}'.format(type(str_)))
+    return str_.decode('ascii')
diff --git a/rhodecode/lib/type_utils.py b/rhodecode/lib/type_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/type_utils.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def str2bool(str_):
+    """
+    returns True/False value from given string, it tries to translate the
+    string into boolean
+
+    :param str_: string value to translate into boolean
+    :rtype: boolean
+    :returns: boolean from given string
+    """
+    if str_ is None:
+        return False
+    if str_ in (True, False):
+        return str_
+    str_ = str(str_).strip().lower()
+    return str_ in ('t', 'true', 'y', 'yes', 'on', '1')
+
+
+def aslist(obj, sep=None, strip=True):
+    """
+    Returns given string separated by sep as list
+
+    :param obj:
+    :param sep:
+    :param strip:
+    """
+    if isinstance(obj, str):
+        lst = obj.split(sep)
+        if strip:
+            lst = [v.strip() for v in lst]
+        return lst
+    elif isinstance(obj, (list, tuple)):
+        return obj
+    elif obj is None:
+        return []
+    else:
+        return [obj]
diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py
--- a/rhodecode/lib/utils2.py
+++ b/rhodecode/lib/utils2.py
@@ -26,7 +26,6 @@ Some simple helper functions
 import collections
 import datetime
 import dateutil.relativedelta
-import hashlib
 import logging
 import re
 import sys
@@ -52,22 +51,12 @@ from pyramid.settings import asbool
 
 import rhodecode
 from rhodecode.translation import _, _pluralize
-
-
-def md5(s):
-    return hashlib.md5(s).hexdigest()
-
+from rhodecode.lib.str_utils import safe_str, safe_int, safe_bytes
+from rhodecode.lib.hash_utils import md5, md5_safe, sha1, sha1_safe
+from rhodecode.lib.type_utils import aslist, str2bool
 
-def md5_safe(s):
-    return md5(safe_str(s))
-
-
-def sha1(s):
-    return hashlib.sha1(s).hexdigest()
-
-
-def sha1_safe(s):
-    return sha1(safe_str(s))
+#TODO: there's no longer safe_unicode, we mock it now, but should remove it
+safe_unicode = safe_str
 
 
 def __get_lem(extra_mapping=None):
@@ -110,44 +99,6 @@ def __get_lem(extra_mapping=None):
     return data
 
 
-def str2bool(_str):
-    """
-    returns True/False value from given string, it tries to translate the
-    string into boolean
-
-    :param _str: string value to translate into boolean
-    :rtype: boolean
-    :returns: boolean from given string
-    """
-    if _str is None:
-        return False
-    if _str in (True, False):
-        return _str
-    _str = str(_str).strip().lower()
-    return _str in ('t', 'true', 'y', 'yes', 'on', '1')
-
-
-def aslist(obj, sep=None, strip=True):
-    """
-    Returns given string separated by sep as list
-
-    :param obj:
-    :param sep:
-    :param strip:
-    """
-    if isinstance(obj, (basestring,)):
-        lst = obj.split(sep)
-        if strip:
-            lst = [v.strip() for v in lst]
-        return lst
-    elif isinstance(obj, (list, tuple)):
-        return obj
-    elif obj is None:
-        return []
-    else:
-        return [obj]
-
-
 def convert_line_endings(line, mode):
     """
     Converts a given line  "line end" accordingly to given mode
@@ -193,115 +144,6 @@ def detect_mode(line, default):
         return default
 
 
-def safe_int(val, default=None):
-    """
-    Returns int() of val if val is not convertable to int use default
-    instead
-
-    :param val:
-    :param default:
-    """
-
-    try:
-        val = int(val)
-    except (ValueError, TypeError):
-        val = default
-
-    return val
-
-
-def safe_unicode(str_, from_encoding=None, use_chardet=False):
-    """
-    safe unicode function. Does few trick to turn str_ into unicode
-
-    In case of UnicodeDecode error, we try to return it with encoding detected
-    by chardet library if it fails fallback to unicode with errors replaced
-
-    :param str_: string to decode
-    :rtype: unicode
-    :returns: unicode object
-    """
-    if isinstance(str_, unicode):
-        return str_
-
-    if not from_encoding:
-        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
-                                                        'utf8'), sep=',')
-        from_encoding = DEFAULT_ENCODINGS
-
-    if not isinstance(from_encoding, (list, tuple)):
-        from_encoding = [from_encoding]
-
-    try:
-        return unicode(str_)
-    except UnicodeDecodeError:
-        pass
-
-    for enc in from_encoding:
-        try:
-            return unicode(str_, enc)
-        except UnicodeDecodeError:
-            pass
-
-    if use_chardet:
-        try:
-            import chardet
-            encoding = chardet.detect(str_)['encoding']
-            if encoding is None:
-                raise Exception()
-            return str_.decode(encoding)
-        except (ImportError, UnicodeDecodeError, Exception):
-            return unicode(str_, from_encoding[0], 'replace')
-    else:
-        return unicode(str_, from_encoding[0], 'replace')
-
-def safe_str(unicode_, to_encoding=None, use_chardet=False):
-    """
-    safe str function. Does few trick to turn unicode_ into string
-
-    In case of UnicodeEncodeError, we try to return it with encoding detected
-    by chardet library if it fails fallback to string with errors replaced
-
-    :param unicode_: unicode to encode
-    :rtype: str
-    :returns: str object
-    """
-
-    # if it's not basestr cast to str
-    if not isinstance(unicode_, str):
-        return str(unicode_)
-
-    if isinstance(unicode_, str):
-        return unicode_
-
-    if not to_encoding:
-        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
-                                                        'utf8'), sep=',')
-        to_encoding = DEFAULT_ENCODINGS
-
-    if not isinstance(to_encoding, (list, tuple)):
-        to_encoding = [to_encoding]
-
-    for enc in to_encoding:
-        try:
-            return unicode_.encode(enc)
-        except UnicodeEncodeError:
-            pass
-
-    if use_chardet:
-        try:
-            import chardet
-            encoding = chardet.detect(unicode_)['encoding']
-            if encoding is None:
-                raise UnicodeEncodeError()
-
-            return unicode_.encode(encoding)
-        except (ImportError, UnicodeEncodeError):
-            return unicode_.encode(to_encoding[0], 'replace')
-    else:
-        return unicode_.encode(to_encoding[0], 'replace')
-
-
 def remove_suffix(s, suffix):
     if s.endswith(suffix):
         s = s[:-1 * len(suffix)]