str_utils.py
158 lines
| 4.6 KiB
| text/x-python
|
PythonLexer
r1249 | # RhodeCode VCSServer provides access to different vcs backends via network. | |||
# Copyright (C) 2014-2023 RhodeCode GmbH | ||||
# | ||||
# This program is free software; you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation; either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program; if not, write to the Free Software Foundation, | ||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
import typing | ||||
import base64 | ||||
import logging | ||||
log = logging.getLogger(__name__) | ||||
def safe_int(val, default=None) -> int: | ||||
""" | ||||
Returns int() of val if val is not convertable to int use default | ||||
instead | ||||
:param val: | ||||
:param default: | ||||
""" | ||||
try: | ||||
val = int(val) | ||||
except (ValueError, TypeError): | ||||
val = default | ||||
return val | ||||
def base64_to_str(text) -> str: | ||||
return safe_str(base64.encodebytes(safe_bytes(text))).strip() | ||||
def get_default_encodings() -> list[str]: | ||||
return ['utf8'] | ||||
def safe_str(str_, to_encoding=None) -> str: | ||||
""" | ||||
safe str function. Does few trick to turn unicode_ into string | ||||
:param str_: str to encode | ||||
:param to_encoding: encode to this type UTF8 default | ||||
""" | ||||
if isinstance(str_, str): | ||||
return str_ | ||||
# if it's bytes cast to str | ||||
if not isinstance(str_, bytes): | ||||
return str(str_) | ||||
to_encoding = to_encoding or get_default_encodings() | ||||
if not isinstance(to_encoding, (list, tuple)): | ||||
to_encoding = [to_encoding] | ||||
for enc in to_encoding: | ||||
try: | ||||
return str(str_, enc) | ||||
except UnicodeDecodeError: | ||||
pass | ||||
return str(str_, to_encoding[0], 'replace') | ||||
def safe_bytes(str_, from_encoding=None) -> bytes: | ||||
""" | ||||
safe bytes function. Does few trick to turn str_ into bytes string: | ||||
:param str_: string to decode | ||||
:param from_encoding: encode from this type UTF8 default | ||||
""" | ||||
if isinstance(str_, bytes): | ||||
return str_ | ||||
if not isinstance(str_, str): | ||||
raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}') | ||||
from_encoding = from_encoding or get_default_encodings() | ||||
if not isinstance(from_encoding, (list, tuple)): | ||||
from_encoding = [from_encoding] | ||||
for enc in from_encoding: | ||||
try: | ||||
return str_.encode(enc) | ||||
except UnicodeDecodeError: | ||||
pass | ||||
return str_.encode(from_encoding[0], 'replace') | ||||
def ascii_bytes(str_, allow_bytes=False) -> bytes: | ||||
""" | ||||
Simple conversion from str to bytes, with assumption that str_ is pure ASCII. | ||||
Fails with UnicodeError on invalid input. | ||||
This should be used where encoding and "safe" ambiguity should be avoided. | ||||
Where strings already have been encoded in other ways but still are unicode | ||||
string - for example to hex, base64, json, urlencoding, or are known to be | ||||
identifiers. | ||||
""" | ||||
if allow_bytes and isinstance(str_, bytes): | ||||
return str_ | ||||
if not isinstance(str_, str): | ||||
raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}') | ||||
return str_.encode('ascii') | ||||
def ascii_str(str_) -> str: | ||||
""" | ||||
Simple conversion from bytes to str, with assumption that str_ is pure ASCII. | ||||
Fails with UnicodeError on invalid input. | ||||
This should be used where encoding and "safe" ambiguity should be avoided. | ||||
Where strings are encoded but also in other ways are known to be ASCII, and | ||||
where a unicode string is wanted without caring about encoding. For example | ||||
to hex, base64, urlencoding, or are known to be identifiers. | ||||
""" | ||||
if not isinstance(str_, bytes): | ||||
raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}') | ||||
return str_.decode('ascii') | ||||
def convert_to_str(data): | ||||
if isinstance(data, bytes): | ||||
return safe_str(data) | ||||
elif isinstance(data, tuple): | ||||
return tuple(convert_to_str(item) for item in data) | ||||
elif isinstance(data, list): | ||||
return list(convert_to_str(item) for item in data) | ||||
else: | ||||
return data | ||||
def splitnewlines(text: bytes): | ||||
""" | ||||
like splitlines, but only split on newlines. | ||||
""" | ||||
lines = [_l + b'\n' for _l in text.split(b'\n')] | ||||
if lines: | ||||
if lines[-1] == b'\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||