str_utils.py
187 lines
| 5.2 KiB
| text/x-python
|
PythonLexer
r5088 | # Copyright (C) 2011-2023 RhodeCode GmbH | |||
r4915 | # | |||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU Affero General Public License, version 3 | ||||
# (only), as published by the Free Software Foundation. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU Affero General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
# | ||||
# This program is dual-licensed. If you wish to learn more about the | ||||
# RhodeCode Enterprise Edition, including its added features, Support services, | ||||
# and proprietary license terms, please see https://rhodecode.com/licenses/ | ||||
r5065 | import typing | |||
import base64 | ||||
r4915 | import logging | |||
r5065 | from unidecode import unidecode | |||
r4915 | import rhodecode | |||
from rhodecode.lib.type_utils import aslist | ||||
r5065 | ||||
r4915 | log = logging.getLogger(__name__) | |||
def safe_int(val, default=None) -> int: | ||||
""" | ||||
Returns int() of val if val is not convertable to int use default | ||||
instead | ||||
:param val: | ||||
:param default: | ||||
""" | ||||
try: | ||||
val = int(val) | ||||
except (ValueError, TypeError): | ||||
val = default | ||||
return val | ||||
r5065 | def safe_float(val, default=None) -> float: | |||
""" | ||||
Returns float() of val if val is not convertable to float use default | ||||
instead | ||||
:param val: | ||||
:param default: | ||||
""" | ||||
try: | ||||
val = float(val) | ||||
except (ValueError, TypeError): | ||||
val = default | ||||
return val | ||||
r5101 | def base64_to_str(text: str | bytes) -> str: | |||
r5065 | return safe_str(base64.encodebytes(safe_bytes(text))).strip() | |||
r5096 | def get_default_encodings() -> list[str]: | |||
r4915 | return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',') | |||
r5065 | DEFAULT_ENCODINGS = get_default_encodings() | |||
r4915 | def safe_str(str_, to_encoding=None) -> str: | |||
""" | ||||
safe str function. Does few trick to turn unicode_ into string | ||||
:param str_: str to encode | ||||
:param to_encoding: encode to this type UTF8 default | ||||
""" | ||||
if isinstance(str_, str): | ||||
return str_ | ||||
# if it's bytes cast to str | ||||
if not isinstance(str_, bytes): | ||||
return str(str_) | ||||
r5065 | to_encoding = to_encoding or DEFAULT_ENCODINGS | |||
r4915 | if not isinstance(to_encoding, (list, tuple)): | |||
to_encoding = [to_encoding] | ||||
for enc in to_encoding: | ||||
try: | ||||
return str(str_, enc) | ||||
except UnicodeDecodeError: | ||||
pass | ||||
return str(str_, to_encoding[0], 'replace') | ||||
def safe_bytes(str_, from_encoding=None) -> bytes: | ||||
""" | ||||
safe bytes function. Does few trick to turn str_ into bytes string: | ||||
:param str_: string to decode | ||||
:param from_encoding: encode from this type UTF8 default | ||||
""" | ||||
if isinstance(str_, bytes): | ||||
return str_ | ||||
if not isinstance(str_, str): | ||||
r5065 | raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}') | |||
r4915 | ||||
from_encoding = from_encoding or get_default_encodings() | ||||
if not isinstance(from_encoding, (list, tuple)): | ||||
from_encoding = [from_encoding] | ||||
for enc in from_encoding: | ||||
try: | ||||
return str_.encode(enc) | ||||
except UnicodeDecodeError: | ||||
pass | ||||
return str_.encode(from_encoding[0], 'replace') | ||||
def ascii_bytes(str_, allow_bytes=False) -> bytes: | ||||
""" | ||||
Simple conversion from str to bytes, with assumption that str_ is pure ASCII. | ||||
Fails with UnicodeError on invalid input. | ||||
This should be used where encoding and "safe" ambiguity should be avoided. | ||||
Where strings already have been encoded in other ways but still are unicode | ||||
string - for example to hex, base64, json, urlencoding, or are known to be | ||||
identifiers. | ||||
""" | ||||
if allow_bytes and isinstance(str_, bytes): | ||||
return str_ | ||||
if not isinstance(str_, str): | ||||
r5065 | raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}') | |||
r4915 | return str_.encode('ascii') | |||
r5065 | def ascii_str(str_) -> str: | |||
r4915 | """ | |||
Simple conversion from bytes to str, with assumption that str_ is pure ASCII. | ||||
Fails with UnicodeError on invalid input. | ||||
This should be used where encoding and "safe" ambiguity should be avoided. | ||||
Where strings are encoded but also in other ways are known to be ASCII, and | ||||
where a unicode string is wanted without caring about encoding. For example | ||||
to hex, base64, urlencoding, or are known to be identifiers. | ||||
""" | ||||
if not isinstance(str_, bytes): | ||||
r5065 | raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}') | |||
r4915 | return str_.decode('ascii') | |||
r4989 | ||||
r5065 | def convert_special_chars(str_) -> str: | |||
r4989 | """ | |||
trie to replace non-ascii letters to their ascii representation eg:: | ||||
`żołw` converts into `zolw` | ||||
""" | ||||
value = safe_str(str_) | ||||
r5065 | converted_value = unidecode(value) | |||
r4989 | return converted_value | |||
r5337 | ||||
def splitnewlines(text: bytes): | ||||
""" | ||||
like splitlines, but only split on newlines. | ||||
""" | ||||
lines = [_l + b'\n' for _l in text.split(b'\n')] | ||||
if lines: | ||||
if lines[-1] == b'\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
r5516 | ||||
def header_safe_str(val): | ||||
return safe_bytes(val).decode('latin-1', errors='replace') | ||||