##// END OF EJS Templates
blame: use BinaryEnvelope wrapper to handle raw non-ascii content of files
blame: use BinaryEnvelope wrapper to handle raw non-ascii content of files

File last commit:

r1126:f96985cd python3
r1139:1b29ba78 default
Show More
str_utils.py
133 lines | 4.0 KiB | text/x-python | PythonLexer
packages: move the str utils to it's own module
r1060 # RhodeCode VCSServer provides access to different vcs backends via network.
source-code: updated copyrights to 2023
r1126 # Copyright (C) 2014-2023 RhodeCode GmbH
packages: move the str utils to it's own module
r1060 #
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
utils: added few typing fixes to str_utils
r1105 import typing
import base64
packages: move the str utils to it's own module
r1060 import logging
log = logging.getLogger(__name__)
def safe_int(val, default=None) -> int:
"""
Returns int() of val if val is not convertable to int use default
instead
:param val:
:param default:
"""
try:
val = int(val)
except (ValueError, TypeError):
val = default
return val
utils: added few typing fixes to str_utils
r1105 def base64_to_str(text) -> str:
return safe_str(base64.encodebytes(safe_bytes(text))).strip()
python3: fixes and code optimization for python3.11
r1114 def get_default_encodings() -> list[str]:
utils: added few typing fixes to str_utils
r1105 return ['utf8']
packages: move the str utils to it's own module
r1060 def safe_str(str_, to_encoding=None) -> str:
"""
safe str function. Does few trick to turn unicode_ into string
:param str_: str to encode
:param to_encoding: encode to this type UTF8 default
"""
if isinstance(str_, str):
return str_
# if it's bytes cast to str
if not isinstance(str_, bytes):
return str(str_)
utils: added few typing fixes to str_utils
r1105 to_encoding = to_encoding or get_default_encodings()
packages: move the str utils to it's own module
r1060 if not isinstance(to_encoding, (list, tuple)):
to_encoding = [to_encoding]
for enc in to_encoding:
try:
return str(str_, enc)
except UnicodeDecodeError:
pass
return str(str_, to_encoding[0], 'replace')
def safe_bytes(str_, from_encoding=None) -> bytes:
"""
safe bytes function. Does few trick to turn str_ into bytes string:
:param str_: string to decode
:param from_encoding: encode from this type UTF8 default
"""
if isinstance(str_, bytes):
return str_
if not isinstance(str_, str):
utils: added few typing fixes to str_utils
r1105 raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
packages: move the str utils to it's own module
r1060
utils: added few typing fixes to str_utils
r1105 from_encoding = from_encoding or get_default_encodings()
packages: move the str utils to it's own module
r1060 if not isinstance(from_encoding, (list, tuple)):
from_encoding = [from_encoding]
for enc in from_encoding:
try:
return str_.encode(enc)
except UnicodeDecodeError:
pass
return str_.encode(from_encoding[0], 'replace')
def ascii_bytes(str_, allow_bytes=False) -> bytes:
"""
Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
Fails with UnicodeError on invalid input.
This should be used where encoding and "safe" ambiguity should be avoided.
Where strings already have been encoded in other ways but still are unicode
string - for example to hex, base64, json, urlencoding, or are known to be
identifiers.
"""
if allow_bytes and isinstance(str_, bytes):
return str_
if not isinstance(str_, str):
utils: added few typing fixes to str_utils
r1105 raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
packages: move the str utils to it's own module
r1060 return str_.encode('ascii')
utils: added few typing fixes to str_utils
r1105 def ascii_str(str_) -> str:
packages: move the str utils to it's own module
r1060 """
Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
Fails with UnicodeError on invalid input.
This should be used where encoding and "safe" ambiguity should be avoided.
Where strings are encoded but also in other ways are known to be ASCII, and
where a unicode string is wanted without caring about encoding. For example
to hex, base64, urlencoding, or are known to be identifiers.
"""
if not isinstance(str_, bytes):
utils: added few typing fixes to str_utils
r1105 raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
packages: move the str utils to it's own module
r1060 return str_.decode('ascii')