# HG changeset patch # User RhodeCode Admin # Date 2023-03-06 21:19:44 # Node ID f87c218ed41895c8caf14ec851a137d5ccb0dfb4 # Parent 49ad81de30dd6905895be1e42212a7c38b00117c core: break down some utils for better imports diff --git a/rhodecode/lib/hash_utils.py b/rhodecode/lib/hash_utils.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/hash_utils.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011-2020 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import hashlib +from rhodecode.lib.str_utils import safe_bytes + + +def md5(s): + return hashlib.md5(s).hexdigest() + + +def md5_safe(s): + return md5(safe_bytes(s)) + + +def sha1(s): + return hashlib.sha1(s).hexdigest() + + +def sha1_safe(s): + return sha1(safe_bytes(s)) diff --git a/rhodecode/lib/str_utils.py b/rhodecode/lib/str_utils.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/str_utils.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011-2020 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import logging +import rhodecode +from rhodecode.lib.type_utils import aslist + +log = logging.getLogger(__name__) + + +def safe_int(val, default=None) -> int: + """ + Returns int() of val if val is not convertable to int use default + instead + + :param val: + :param default: + """ + + try: + val = int(val) + except (ValueError, TypeError): + val = default + + return val + + +def get_default_encodings(): + return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',') + + +def safe_str(str_, to_encoding=None) -> str: + """ + safe str function. Does few trick to turn unicode_ into string + + :param str_: str to encode + :param to_encoding: encode to this type UTF8 default + :rtype: str + :returns: str object + """ + if isinstance(str_, str): + return str_ + + # if it's bytes cast to str + if not isinstance(str_, bytes): + return str(str_) + + to_encoding = to_encoding or get_default_encodings() + if not isinstance(to_encoding, (list, tuple)): + to_encoding = [to_encoding] + + for enc in to_encoding: + try: + return str(str_, enc) + except UnicodeDecodeError: + pass + + return str(str_, to_encoding[0], 'replace') + + +def safe_bytes(str_, from_encoding=None) -> bytes: + """ + safe bytes function. Does few trick to turn str_ into bytes string: + + :param str_: string to decode + :param from_encoding: encode from this type UTF8 default + :rtype: unicode + :returns: unicode object + """ + if isinstance(str_, bytes): + return str_ + + if not isinstance(str_, str): + raise ValueError('safe_bytes cannot convert other types than str: got: {}'.format(type(str_))) + + from_encoding = from_encoding or get_default_encodings() + if not isinstance(from_encoding, (list, tuple)): + from_encoding = [from_encoding] + + for enc in from_encoding: + try: + return str_.encode(enc) + except UnicodeDecodeError: + pass + + return str_.encode(from_encoding[0], 'replace') + + +def ascii_bytes(str_, allow_bytes=False) -> bytes: + """ + Simple conversion from str to bytes, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings already have been encoded in other ways but still are unicode + string - for example to hex, base64, json, urlencoding, or are known to be + identifiers. + """ + if allow_bytes and isinstance(str_, bytes): + return str_ + + if not isinstance(str_, str): + raise ValueError('ascii_bytes cannot convert other types than str: got: {}'.format(type(str_))) + return str_.encode('ascii') + + +def ascii_str(str_): + """ + Simple conversion from bytes to str, with assumption that str_ is pure ASCII. + Fails with UnicodeError on invalid input. + This should be used where encoding and "safe" ambiguity should be avoided. + Where strings are encoded but also in other ways are known to be ASCII, and + where a unicode string is wanted without caring about encoding. For example + to hex, base64, urlencoding, or are known to be identifiers. + """ + + if not isinstance(str_, bytes): + raise ValueError('ascii_str cannot convert other types than bytes: got: {}'.format(type(str_))) + return str_.decode('ascii') diff --git a/rhodecode/lib/type_utils.py b/rhodecode/lib/type_utils.py new file mode 100644 --- /dev/null +++ b/rhodecode/lib/type_utils.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011-2020 RhodeCode GmbH +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3 +# (only), as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# This program is dual-licensed. If you wish to learn more about the +# RhodeCode Enterprise Edition, including its added features, Support services, +# and proprietary license terms, please see https://rhodecode.com/licenses/ + +import logging + +log = logging.getLogger(__name__) + + +def str2bool(str_): + """ + returns True/False value from given string, it tries to translate the + string into boolean + + :param str_: string value to translate into boolean + :rtype: boolean + :returns: boolean from given string + """ + if str_ is None: + return False + if str_ in (True, False): + return str_ + str_ = str(str_).strip().lower() + return str_ in ('t', 'true', 'y', 'yes', 'on', '1') + + +def aslist(obj, sep=None, strip=True): + """ + Returns given string separated by sep as list + + :param obj: + :param sep: + :param strip: + """ + if isinstance(obj, str): + lst = obj.split(sep) + if strip: + lst = [v.strip() for v in lst] + return lst + elif isinstance(obj, (list, tuple)): + return obj + elif obj is None: + return [] + else: + return [obj] diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py --- a/rhodecode/lib/utils2.py +++ b/rhodecode/lib/utils2.py @@ -26,7 +26,6 @@ Some simple helper functions import collections import datetime import dateutil.relativedelta -import hashlib import logging import re import sys @@ -52,22 +51,12 @@ from pyramid.settings import asbool import rhodecode from rhodecode.translation import _, _pluralize - - -def md5(s): - return hashlib.md5(s).hexdigest() - +from rhodecode.lib.str_utils import safe_str, safe_int, safe_bytes +from rhodecode.lib.hash_utils import md5, md5_safe, sha1, sha1_safe +from rhodecode.lib.type_utils import aslist, str2bool -def md5_safe(s): - return md5(safe_str(s)) - - -def sha1(s): - return hashlib.sha1(s).hexdigest() - - -def sha1_safe(s): - return sha1(safe_str(s)) +#TODO: there's no longer safe_unicode, we mock it now, but should remove it +safe_unicode = safe_str def __get_lem(extra_mapping=None): @@ -110,44 +99,6 @@ def __get_lem(extra_mapping=None): return data -def str2bool(_str): - """ - returns True/False value from given string, it tries to translate the - string into boolean - - :param _str: string value to translate into boolean - :rtype: boolean - :returns: boolean from given string - """ - if _str is None: - return False - if _str in (True, False): - return _str - _str = str(_str).strip().lower() - return _str in ('t', 'true', 'y', 'yes', 'on', '1') - - -def aslist(obj, sep=None, strip=True): - """ - Returns given string separated by sep as list - - :param obj: - :param sep: - :param strip: - """ - if isinstance(obj, (basestring,)): - lst = obj.split(sep) - if strip: - lst = [v.strip() for v in lst] - return lst - elif isinstance(obj, (list, tuple)): - return obj - elif obj is None: - return [] - else: - return [obj] - - def convert_line_endings(line, mode): """ Converts a given line "line end" accordingly to given mode @@ -193,115 +144,6 @@ def detect_mode(line, default): return default -def safe_int(val, default=None): - """ - Returns int() of val if val is not convertable to int use default - instead - - :param val: - :param default: - """ - - try: - val = int(val) - except (ValueError, TypeError): - val = default - - return val - - -def safe_unicode(str_, from_encoding=None, use_chardet=False): - """ - safe unicode function. Does few trick to turn str_ into unicode - - In case of UnicodeDecode error, we try to return it with encoding detected - by chardet library if it fails fallback to unicode with errors replaced - - :param str_: string to decode - :rtype: unicode - :returns: unicode object - """ - if isinstance(str_, unicode): - return str_ - - if not from_encoding: - DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', - 'utf8'), sep=',') - from_encoding = DEFAULT_ENCODINGS - - if not isinstance(from_encoding, (list, tuple)): - from_encoding = [from_encoding] - - try: - return unicode(str_) - except UnicodeDecodeError: - pass - - for enc in from_encoding: - try: - return unicode(str_, enc) - except UnicodeDecodeError: - pass - - if use_chardet: - try: - import chardet - encoding = chardet.detect(str_)['encoding'] - if encoding is None: - raise Exception() - return str_.decode(encoding) - except (ImportError, UnicodeDecodeError, Exception): - return unicode(str_, from_encoding[0], 'replace') - else: - return unicode(str_, from_encoding[0], 'replace') - -def safe_str(unicode_, to_encoding=None, use_chardet=False): - """ - safe str function. Does few trick to turn unicode_ into string - - In case of UnicodeEncodeError, we try to return it with encoding detected - by chardet library if it fails fallback to string with errors replaced - - :param unicode_: unicode to encode - :rtype: str - :returns: str object - """ - - # if it's not basestr cast to str - if not isinstance(unicode_, str): - return str(unicode_) - - if isinstance(unicode_, str): - return unicode_ - - if not to_encoding: - DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', - 'utf8'), sep=',') - to_encoding = DEFAULT_ENCODINGS - - if not isinstance(to_encoding, (list, tuple)): - to_encoding = [to_encoding] - - for enc in to_encoding: - try: - return unicode_.encode(enc) - except UnicodeEncodeError: - pass - - if use_chardet: - try: - import chardet - encoding = chardet.detect(unicode_)['encoding'] - if encoding is None: - raise UnicodeEncodeError() - - return unicode_.encode(encoding) - except (ImportError, UnicodeEncodeError): - return unicode_.encode(to_encoding[0], 'replace') - else: - return unicode_.encode(to_encoding[0], 'replace') - - def remove_suffix(s, suffix): if s.endswith(suffix): s = s[:-1 * len(suffix)]