|
|
# Copyright (C) 2011-2024 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU Affero General Public License, version 3
|
|
|
# (only), as published by the Free Software Foundation.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
#
|
|
|
# This program is dual-licensed. If you wish to learn more about the
|
|
|
# RhodeCode Enterprise Edition, including its added features, Support services,
|
|
|
# and proprietary license terms, please see https://rhodecode.com/licenses/
|
|
|
|
|
|
import typing
|
|
|
import base64
|
|
|
import logging
|
|
|
from unidecode import unidecode
|
|
|
|
|
|
import rhodecode
|
|
|
from rhodecode.lib.type_utils import aslist
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
def safe_int(val, default=None) -> int:
|
|
|
"""
|
|
|
Returns int() of val if val is not convertable to int use default
|
|
|
instead
|
|
|
|
|
|
:param val:
|
|
|
:param default:
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
val = int(val)
|
|
|
except (ValueError, TypeError):
|
|
|
val = default
|
|
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
def safe_float(val, default=None) -> float:
|
|
|
"""
|
|
|
Returns float() of val if val is not convertable to float use default
|
|
|
instead
|
|
|
|
|
|
:param val:
|
|
|
:param default:
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
val = float(val)
|
|
|
except (ValueError, TypeError):
|
|
|
val = default
|
|
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
def base64_to_str(text: str | bytes) -> str:
|
|
|
return safe_str(base64.encodebytes(safe_bytes(text))).strip()
|
|
|
|
|
|
|
|
|
def get_default_encodings() -> list[str]:
|
|
|
return rhodecode.ConfigGet().get_list('default_encoding', missing='utf8')
|
|
|
|
|
|
|
|
|
def safe_str(str_, to_encoding=None) -> str:
|
|
|
"""
|
|
|
safe str function. Does few trick to turn unicode_ into string
|
|
|
|
|
|
:param str_: str to encode
|
|
|
:param to_encoding: encode to this type UTF8 default
|
|
|
"""
|
|
|
if isinstance(str_, str):
|
|
|
return str_
|
|
|
|
|
|
# if it's bytes cast to str
|
|
|
if not isinstance(str_, bytes):
|
|
|
return str(str_)
|
|
|
|
|
|
to_encoding = to_encoding or get_default_encodings()
|
|
|
if not isinstance(to_encoding, (list, tuple)):
|
|
|
to_encoding = [to_encoding]
|
|
|
|
|
|
for enc in to_encoding:
|
|
|
try:
|
|
|
return str(str_, enc)
|
|
|
except UnicodeDecodeError:
|
|
|
pass
|
|
|
|
|
|
return str(str_, to_encoding[0], 'replace')
|
|
|
|
|
|
|
|
|
def safe_bytes(str_, from_encoding=None) -> bytes:
|
|
|
"""
|
|
|
safe bytes function. Does few trick to turn str_ into bytes string:
|
|
|
|
|
|
:param str_: string to decode
|
|
|
:param from_encoding: encode from this type UTF8 default
|
|
|
"""
|
|
|
if isinstance(str_, bytes):
|
|
|
return str_
|
|
|
|
|
|
if not isinstance(str_, str):
|
|
|
raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
|
|
|
|
|
|
from_encoding = from_encoding or get_default_encodings()
|
|
|
if not isinstance(from_encoding, (list, tuple)):
|
|
|
from_encoding = [from_encoding]
|
|
|
|
|
|
for enc in from_encoding:
|
|
|
try:
|
|
|
return str_.encode(enc)
|
|
|
except (UnicodeDecodeError, UnicodeEncodeError):
|
|
|
pass
|
|
|
|
|
|
return str_.encode(from_encoding[0], 'replace')
|
|
|
|
|
|
|
|
|
def ascii_bytes(str_, allow_bytes=False) -> bytes:
|
|
|
"""
|
|
|
Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
|
|
|
Fails with UnicodeError on invalid input.
|
|
|
This should be used where encoding and "safe" ambiguity should be avoided.
|
|
|
Where strings already have been encoded in other ways but still are unicode
|
|
|
string - for example to hex, base64, json, urlencoding, or are known to be
|
|
|
identifiers.
|
|
|
"""
|
|
|
if allow_bytes and isinstance(str_, bytes):
|
|
|
return str_
|
|
|
|
|
|
if not isinstance(str_, str):
|
|
|
raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
|
|
|
return str_.encode('ascii')
|
|
|
|
|
|
|
|
|
def ascii_str(str_) -> str:
|
|
|
"""
|
|
|
Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
|
|
|
Fails with UnicodeError on invalid input.
|
|
|
This should be used where encoding and "safe" ambiguity should be avoided.
|
|
|
Where strings are encoded but also in other ways are known to be ASCII, and
|
|
|
where a unicode string is wanted without caring about encoding. For example
|
|
|
to hex, base64, urlencoding, or are known to be identifiers.
|
|
|
"""
|
|
|
|
|
|
if not isinstance(str_, bytes):
|
|
|
raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
|
|
|
return str_.decode('ascii')
|
|
|
|
|
|
|
|
|
def convert_special_chars(str_) -> str:
|
|
|
"""
|
|
|
trie to replace non-ascii letters to their ascii representation eg::
|
|
|
|
|
|
`żołw` converts into `zolw`
|
|
|
"""
|
|
|
value = safe_str(str_)
|
|
|
converted_value = unidecode(value)
|
|
|
return converted_value
|
|
|
|
|
|
|
|
|
def splitnewlines(text: bytes):
|
|
|
"""
|
|
|
like splitlines, but only split on newlines.
|
|
|
"""
|
|
|
|
|
|
lines = [_l + b'\n' for _l in text.split(b'\n')]
|
|
|
if lines:
|
|
|
if lines[-1] == b'\n':
|
|
|
lines.pop()
|
|
|
else:
|
|
|
lines[-1] = lines[-1][:-1]
|
|
|
return lines
|
|
|
|
|
|
|
|
|
def header_safe_str(val):
|
|
|
return safe_bytes(val).decode('latin-1', errors='replace')
|
|
|
|