str_utils.py
157 lines
| 4.6 KiB
| text/x-python
|
PythonLexer
/ vcsserver / str_utils.py
r1060 | # RhodeCode VCSServer provides access to different vcs backends via network. | ||
r1126 | # Copyright (C) 2014-2023 RhodeCode GmbH | ||
r1060 | # | ||
# This program is free software; you can redistribute it and/or modify | |||
# it under the terms of the GNU General Public License as published by | |||
# the Free Software Foundation; either version 3 of the License, or | |||
# (at your option) any later version. | |||
# | |||
# This program is distributed in the hope that it will be useful, | |||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
# GNU General Public License for more details. | |||
# | |||
# You should have received a copy of the GNU General Public License | |||
# along with this program; if not, write to the Free Software Foundation, | |||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
r1105 | import typing | ||
import base64 | |||
r1060 | import logging | ||
log = logging.getLogger(__name__) | |||
def safe_int(val, default=None) -> int: | |||
""" | |||
Returns int() of val if val is not convertable to int use default | |||
instead | |||
:param val: | |||
:param default: | |||
""" | |||
try: | |||
val = int(val) | |||
except (ValueError, TypeError): | |||
val = default | |||
return val | |||
r1105 | def base64_to_str(text) -> str: | ||
return safe_str(base64.encodebytes(safe_bytes(text))).strip() | |||
r1114 | def get_default_encodings() -> list[str]: | ||
r1105 | return ['utf8'] | ||
r1060 | def safe_str(str_, to_encoding=None) -> str: | ||
""" | |||
safe str function. Does few trick to turn unicode_ into string | |||
:param str_: str to encode | |||
:param to_encoding: encode to this type UTF8 default | |||
""" | |||
if isinstance(str_, str): | |||
return str_ | |||
# if it's bytes cast to str | |||
if not isinstance(str_, bytes): | |||
return str(str_) | |||
r1105 | to_encoding = to_encoding or get_default_encodings() | ||
r1060 | if not isinstance(to_encoding, (list, tuple)): | ||
to_encoding = [to_encoding] | |||
for enc in to_encoding: | |||
try: | |||
return str(str_, enc) | |||
except UnicodeDecodeError: | |||
pass | |||
return str(str_, to_encoding[0], 'replace') | |||
def safe_bytes(str_, from_encoding=None) -> bytes: | |||
""" | |||
safe bytes function. Does few trick to turn str_ into bytes string: | |||
:param str_: string to decode | |||
:param from_encoding: encode from this type UTF8 default | |||
""" | |||
if isinstance(str_, bytes): | |||
return str_ | |||
if not isinstance(str_, str): | |||
r1105 | raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}') | ||
r1060 | |||
r1105 | from_encoding = from_encoding or get_default_encodings() | ||
r1060 | if not isinstance(from_encoding, (list, tuple)): | ||
from_encoding = [from_encoding] | |||
for enc in from_encoding: | |||
try: | |||
return str_.encode(enc) | |||
except UnicodeDecodeError: | |||
pass | |||
return str_.encode(from_encoding[0], 'replace') | |||
def ascii_bytes(str_, allow_bytes=False) -> bytes: | |||
""" | |||
Simple conversion from str to bytes, with assumption that str_ is pure ASCII. | |||
Fails with UnicodeError on invalid input. | |||
This should be used where encoding and "safe" ambiguity should be avoided. | |||
Where strings already have been encoded in other ways but still are unicode | |||
string - for example to hex, base64, json, urlencoding, or are known to be | |||
identifiers. | |||
""" | |||
if allow_bytes and isinstance(str_, bytes): | |||
return str_ | |||
if not isinstance(str_, str): | |||
r1105 | raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}') | ||
r1060 | return str_.encode('ascii') | ||
r1105 | def ascii_str(str_) -> str: | ||
r1060 | """ | ||
Simple conversion from bytes to str, with assumption that str_ is pure ASCII. | |||
Fails with UnicodeError on invalid input. | |||
This should be used where encoding and "safe" ambiguity should be avoided. | |||
Where strings are encoded but also in other ways are known to be ASCII, and | |||
where a unicode string is wanted without caring about encoding. For example | |||
to hex, base64, urlencoding, or are known to be identifiers. | |||
""" | |||
if not isinstance(str_, bytes): | |||
r1105 | raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}') | ||
r1060 | return str_.decode('ascii') | ||
r1196 | |||
def convert_to_str(data): | |||
if isinstance(data, bytes): | |||
return safe_str(data) | |||
elif isinstance(data, tuple): | |||
return tuple(convert_to_str(item) for item in data) | |||
elif isinstance(data, list): | |||
return list(convert_to_str(item) for item in data) | |||
else: | |||
return data | |||
r1229 | |||
def splitnewlines(text: bytes): | |||
""" | |||
like splitlines, but only split on newlines. | |||
""" | |||
lines = [_l + b'\n' for _l in text.split(b'\n')] | |||
if lines: | |||
if lines[-1] == b'\n': | |||
lines.pop() | |||
else: | |||
lines[-1] = lines[-1][:-1] | |||
return lines |