diff --git a/rhodecode/lib/hash_utils.py b/rhodecode/lib/hash_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/hash_utils.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import hashlib
+from rhodecode.lib.str_utils import safe_bytes
+
+
+def md5(s):
+ return hashlib.md5(s).hexdigest()
+
+
+def md5_safe(s):
+ return md5(safe_bytes(s))
+
+
+def sha1(s):
+ return hashlib.sha1(s).hexdigest()
+
+
+def sha1_safe(s):
+ return sha1(safe_bytes(s))
diff --git a/rhodecode/lib/str_utils.py b/rhodecode/lib/str_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/str_utils.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import logging
+import rhodecode
+from rhodecode.lib.type_utils import aslist
+
+log = logging.getLogger(__name__)
+
+
+def safe_int(val, default=None) -> int:
+ """
+ Returns int() of val if val is not convertable to int use default
+ instead
+
+ :param val:
+ :param default:
+ """
+
+ try:
+ val = int(val)
+ except (ValueError, TypeError):
+ val = default
+
+ return val
+
+
+def get_default_encodings():
+ return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',')
+
+
+def safe_str(str_, to_encoding=None) -> str:
+ """
+ safe str function. Does few trick to turn unicode_ into string
+
+ :param str_: str to encode
+ :param to_encoding: encode to this type UTF8 default
+ :rtype: str
+ :returns: str object
+ """
+ if isinstance(str_, str):
+ return str_
+
+ # if it's bytes cast to str
+ if not isinstance(str_, bytes):
+ return str(str_)
+
+ to_encoding = to_encoding or get_default_encodings()
+ if not isinstance(to_encoding, (list, tuple)):
+ to_encoding = [to_encoding]
+
+ for enc in to_encoding:
+ try:
+ return str(str_, enc)
+ except UnicodeDecodeError:
+ pass
+
+ return str(str_, to_encoding[0], 'replace')
+
+
+def safe_bytes(str_, from_encoding=None) -> bytes:
+ """
+ safe bytes function. Does few trick to turn str_ into bytes string:
+
+ :param str_: string to decode
+ :param from_encoding: encode from this type UTF8 default
+ :rtype: unicode
+ :returns: unicode object
+ """
+ if isinstance(str_, bytes):
+ return str_
+
+ if not isinstance(str_, str):
+ raise ValueError('safe_bytes cannot convert other types than str: got: {}'.format(type(str_)))
+
+ from_encoding = from_encoding or get_default_encodings()
+ if not isinstance(from_encoding, (list, tuple)):
+ from_encoding = [from_encoding]
+
+ for enc in from_encoding:
+ try:
+ return str_.encode(enc)
+ except UnicodeDecodeError:
+ pass
+
+ return str_.encode(from_encoding[0], 'replace')
+
+
+def ascii_bytes(str_, allow_bytes=False) -> bytes:
+ """
+ Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
+ Fails with UnicodeError on invalid input.
+ This should be used where encoding and "safe" ambiguity should be avoided.
+ Where strings already have been encoded in other ways but still are unicode
+ string - for example to hex, base64, json, urlencoding, or are known to be
+ identifiers.
+ """
+ if allow_bytes and isinstance(str_, bytes):
+ return str_
+
+ if not isinstance(str_, str):
+ raise ValueError('ascii_bytes cannot convert other types than str: got: {}'.format(type(str_)))
+ return str_.encode('ascii')
+
+
+def ascii_str(str_):
+ """
+ Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
+ Fails with UnicodeError on invalid input.
+ This should be used where encoding and "safe" ambiguity should be avoided.
+ Where strings are encoded but also in other ways are known to be ASCII, and
+ where a unicode string is wanted without caring about encoding. For example
+ to hex, base64, urlencoding, or are known to be identifiers.
+ """
+
+ if not isinstance(str_, bytes):
+ raise ValueError('ascii_str cannot convert other types than bytes: got: {}'.format(type(str_)))
+ return str_.decode('ascii')
diff --git a/rhodecode/lib/type_utils.py b/rhodecode/lib/type_utils.py
new file mode 100644
--- /dev/null
+++ b/rhodecode/lib/type_utils.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2011-2020 RhodeCode GmbH
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3
+# (only), as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+#
+# This program is dual-licensed. If you wish to learn more about the
+# RhodeCode Enterprise Edition, including its added features, Support services,
+# and proprietary license terms, please see https://rhodecode.com/licenses/
+
+import logging
+
+log = logging.getLogger(__name__)
+
+
+def str2bool(str_):
+ """
+ returns True/False value from given string, it tries to translate the
+ string into boolean
+
+ :param str_: string value to translate into boolean
+ :rtype: boolean
+ :returns: boolean from given string
+ """
+ if str_ is None:
+ return False
+ if str_ in (True, False):
+ return str_
+ str_ = str(str_).strip().lower()
+ return str_ in ('t', 'true', 'y', 'yes', 'on', '1')
+
+
+def aslist(obj, sep=None, strip=True):
+ """
+ Returns given string separated by sep as list
+
+ :param obj:
+ :param sep:
+ :param strip:
+ """
+ if isinstance(obj, str):
+ lst = obj.split(sep)
+ if strip:
+ lst = [v.strip() for v in lst]
+ return lst
+ elif isinstance(obj, (list, tuple)):
+ return obj
+ elif obj is None:
+ return []
+ else:
+ return [obj]
diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py
--- a/rhodecode/lib/utils2.py
+++ b/rhodecode/lib/utils2.py
@@ -26,7 +26,6 @@ Some simple helper functions
import collections
import datetime
import dateutil.relativedelta
-import hashlib
import logging
import re
import sys
@@ -52,22 +51,12 @@ from pyramid.settings import asbool
import rhodecode
from rhodecode.translation import _, _pluralize
-
-
-def md5(s):
- return hashlib.md5(s).hexdigest()
-
+from rhodecode.lib.str_utils import safe_str, safe_int, safe_bytes
+from rhodecode.lib.hash_utils import md5, md5_safe, sha1, sha1_safe
+from rhodecode.lib.type_utils import aslist, str2bool
-def md5_safe(s):
- return md5(safe_str(s))
-
-
-def sha1(s):
- return hashlib.sha1(s).hexdigest()
-
-
-def sha1_safe(s):
- return sha1(safe_str(s))
+#TODO: there's no longer safe_unicode, we mock it now, but should remove it
+safe_unicode = safe_str
def __get_lem(extra_mapping=None):
@@ -110,44 +99,6 @@ def __get_lem(extra_mapping=None):
return data
-def str2bool(_str):
- """
- returns True/False value from given string, it tries to translate the
- string into boolean
-
- :param _str: string value to translate into boolean
- :rtype: boolean
- :returns: boolean from given string
- """
- if _str is None:
- return False
- if _str in (True, False):
- return _str
- _str = str(_str).strip().lower()
- return _str in ('t', 'true', 'y', 'yes', 'on', '1')
-
-
-def aslist(obj, sep=None, strip=True):
- """
- Returns given string separated by sep as list
-
- :param obj:
- :param sep:
- :param strip:
- """
- if isinstance(obj, (basestring,)):
- lst = obj.split(sep)
- if strip:
- lst = [v.strip() for v in lst]
- return lst
- elif isinstance(obj, (list, tuple)):
- return obj
- elif obj is None:
- return []
- else:
- return [obj]
-
-
def convert_line_endings(line, mode):
"""
Converts a given line "line end" accordingly to given mode
@@ -193,115 +144,6 @@ def detect_mode(line, default):
return default
-def safe_int(val, default=None):
- """
- Returns int() of val if val is not convertable to int use default
- instead
-
- :param val:
- :param default:
- """
-
- try:
- val = int(val)
- except (ValueError, TypeError):
- val = default
-
- return val
-
-
-def safe_unicode(str_, from_encoding=None, use_chardet=False):
- """
- safe unicode function. Does few trick to turn str_ into unicode
-
- In case of UnicodeDecode error, we try to return it with encoding detected
- by chardet library if it fails fallback to unicode with errors replaced
-
- :param str_: string to decode
- :rtype: unicode
- :returns: unicode object
- """
- if isinstance(str_, unicode):
- return str_
-
- if not from_encoding:
- DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
- 'utf8'), sep=',')
- from_encoding = DEFAULT_ENCODINGS
-
- if not isinstance(from_encoding, (list, tuple)):
- from_encoding = [from_encoding]
-
- try:
- return unicode(str_)
- except UnicodeDecodeError:
- pass
-
- for enc in from_encoding:
- try:
- return unicode(str_, enc)
- except UnicodeDecodeError:
- pass
-
- if use_chardet:
- try:
- import chardet
- encoding = chardet.detect(str_)['encoding']
- if encoding is None:
- raise Exception()
- return str_.decode(encoding)
- except (ImportError, UnicodeDecodeError, Exception):
- return unicode(str_, from_encoding[0], 'replace')
- else:
- return unicode(str_, from_encoding[0], 'replace')
-
-def safe_str(unicode_, to_encoding=None, use_chardet=False):
- """
- safe str function. Does few trick to turn unicode_ into string
-
- In case of UnicodeEncodeError, we try to return it with encoding detected
- by chardet library if it fails fallback to string with errors replaced
-
- :param unicode_: unicode to encode
- :rtype: str
- :returns: str object
- """
-
- # if it's not basestr cast to str
- if not isinstance(unicode_, str):
- return str(unicode_)
-
- if isinstance(unicode_, str):
- return unicode_
-
- if not to_encoding:
- DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
- 'utf8'), sep=',')
- to_encoding = DEFAULT_ENCODINGS
-
- if not isinstance(to_encoding, (list, tuple)):
- to_encoding = [to_encoding]
-
- for enc in to_encoding:
- try:
- return unicode_.encode(enc)
- except UnicodeEncodeError:
- pass
-
- if use_chardet:
- try:
- import chardet
- encoding = chardet.detect(unicode_)['encoding']
- if encoding is None:
- raise UnicodeEncodeError()
-
- return unicode_.encode(encoding)
- except (ImportError, UnicodeEncodeError):
- return unicode_.encode(to_encoding[0], 'replace')
- else:
- return unicode_.encode(to_encoding[0], 'replace')
-
-
def remove_suffix(s, suffix):
if s.endswith(suffix):
s = s[:-1 * len(suffix)]