##// END OF EJS Templates
release: version 5.4.0
release: version 5.4.0

File last commit:

r5647:8333bc7b default
r5665:cdbc80b0 merge v5.4.0 stable
Show More
str_utils.py
184 lines | 5.2 KiB | text/x-python | PythonLexer
core: updated copyright to 2024
r5608 # Copyright (C) 2011-2024 RhodeCode GmbH
core: break down some utils for better imports
r4915 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
core: multiple fixes to unicode vs str usage...
r5065 import typing
import base64
core: break down some utils for better imports
r4915 import logging
core: multiple fixes to unicode vs str usage...
r5065 from unidecode import unidecode
core: break down some utils for better imports
r4915 import rhodecode
from rhodecode.lib.type_utils import aslist
core: multiple fixes to unicode vs str usage...
r5065
core: break down some utils for better imports
r4915 log = logging.getLogger(__name__)
def safe_int(val, default=None) -> int:
"""
Returns int() of val if val is not convertable to int use default
instead
:param val:
:param default:
"""
try:
val = int(val)
except (ValueError, TypeError):
val = default
return val
core: multiple fixes to unicode vs str usage...
r5065 def safe_float(val, default=None) -> float:
"""
Returns float() of val if val is not convertable to float use default
instead
:param val:
:param default:
"""
try:
val = float(val)
except (ValueError, TypeError):
val = default
return val
cleanups: typing + whitespaces
r5101 def base64_to_str(text: str | bytes) -> str:
core: multiple fixes to unicode vs str usage...
r5065 return safe_str(base64.encodebytes(safe_bytes(text))).strip()
modernize: python3 updates
r5096 def get_default_encodings() -> list[str]:
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 return rhodecode.ConfigGet().get_list('default_encoding', missing='utf8')
core: multiple fixes to unicode vs str usage...
r5065
core: break down some utils for better imports
r4915 def safe_str(str_, to_encoding=None) -> str:
"""
safe str function. Does few trick to turn unicode_ into string
:param str_: str to encode
:param to_encoding: encode to this type UTF8 default
"""
if isinstance(str_, str):
return str_
# if it's bytes cast to str
if not isinstance(str_, bytes):
return str(str_)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 to_encoding = to_encoding or get_default_encodings()
core: break down some utils for better imports
r4915 if not isinstance(to_encoding, (list, tuple)):
to_encoding = [to_encoding]
for enc in to_encoding:
try:
return str(str_, enc)
except UnicodeDecodeError:
pass
return str(str_, to_encoding[0], 'replace')
def safe_bytes(str_, from_encoding=None) -> bytes:
"""
safe bytes function. Does few trick to turn str_ into bytes string:
:param str_: string to decode
:param from_encoding: encode from this type UTF8 default
"""
if isinstance(str_, bytes):
return str_
if not isinstance(str_, str):
core: multiple fixes to unicode vs str usage...
r5065 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
core: break down some utils for better imports
r4915
from_encoding = from_encoding or get_default_encodings()
if not isinstance(from_encoding, (list, tuple)):
from_encoding = [from_encoding]
for enc in from_encoding:
try:
return str_.encode(enc)
fix(encoding for file): fixed support of non utf-8 files in all backends
r5647 except (UnicodeDecodeError, UnicodeEncodeError):
core: break down some utils for better imports
r4915 pass
return str_.encode(from_encoding[0], 'replace')
def ascii_bytes(str_, allow_bytes=False) -> bytes:
"""
Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
Fails with UnicodeError on invalid input.
This should be used where encoding and "safe" ambiguity should be avoided.
Where strings already have been encoded in other ways but still are unicode
string - for example to hex, base64, json, urlencoding, or are known to be
identifiers.
"""
if allow_bytes and isinstance(str_, bytes):
return str_
if not isinstance(str_, str):
core: multiple fixes to unicode vs str usage...
r5065 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
core: break down some utils for better imports
r4915 return str_.encode('ascii')
core: multiple fixes to unicode vs str usage...
r5065 def ascii_str(str_) -> str:
core: break down some utils for better imports
r4915 """
Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
Fails with UnicodeError on invalid input.
This should be used where encoding and "safe" ambiguity should be avoided.
Where strings are encoded but also in other ways are known to be ASCII, and
where a unicode string is wanted without caring about encoding. For example
to hex, base64, urlencoding, or are known to be identifiers.
"""
if not isinstance(str_, bytes):
core: multiple fixes to unicode vs str usage...
r5065 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
core: break down some utils for better imports
r4915 return str_.decode('ascii')
str_utils: added common non-ascii replacer
r4989
core: multiple fixes to unicode vs str usage...
r5065 def convert_special_chars(str_) -> str:
str_utils: added common non-ascii replacer
r4989 """
trie to replace non-ascii letters to their ascii representation eg::
`żołw` converts into `zolw`
"""
value = safe_str(str_)
core: multiple fixes to unicode vs str usage...
r5065 converted_value = unidecode(value)
str_utils: added common non-ascii replacer
r4989 return converted_value
chore(sync-up): synced libs/settings maker from vcsserver
r5337
def splitnewlines(text: bytes):
"""
like splitlines, but only split on newlines.
"""
lines = [_l + b'\n' for _l in text.split(b'\n')]
if lines:
if lines[-1] == b'\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
feat(artifacts): new artifact storage engines allowing an s3 based uploads
r5516
def header_safe_str(val):
return safe_bytes(val).decode('latin-1', errors='replace')