|
|
# RhodeCode VCSServer provides access to different vcs backends via network.
|
|
|
# Copyright (C) 2014-2023 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
# (at your option) any later version.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
|
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
import typing
|
|
|
import base64
|
|
|
import logging
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
def safe_int(val, default=None) -> int:
|
|
|
"""
|
|
|
Returns int() of val if val is not convertable to int use default
|
|
|
instead
|
|
|
|
|
|
:param val:
|
|
|
:param default:
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
val = int(val)
|
|
|
except (ValueError, TypeError):
|
|
|
val = default
|
|
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
def base64_to_str(text) -> str:
|
|
|
return safe_str(base64.encodebytes(safe_bytes(text))).strip()
|
|
|
|
|
|
|
|
|
def get_default_encodings() -> list[str]:
|
|
|
return ['utf8']
|
|
|
|
|
|
|
|
|
def safe_str(str_, to_encoding=None) -> str:
|
|
|
"""
|
|
|
safe str function. Does few trick to turn unicode_ into string
|
|
|
|
|
|
:param str_: str to encode
|
|
|
:param to_encoding: encode to this type UTF8 default
|
|
|
"""
|
|
|
if isinstance(str_, str):
|
|
|
return str_
|
|
|
|
|
|
# if it's bytes cast to str
|
|
|
if not isinstance(str_, bytes):
|
|
|
return str(str_)
|
|
|
|
|
|
to_encoding = to_encoding or get_default_encodings()
|
|
|
if not isinstance(to_encoding, (list, tuple)):
|
|
|
to_encoding = [to_encoding]
|
|
|
|
|
|
for enc in to_encoding:
|
|
|
try:
|
|
|
return str(str_, enc)
|
|
|
except UnicodeDecodeError:
|
|
|
pass
|
|
|
|
|
|
return str(str_, to_encoding[0], 'replace')
|
|
|
|
|
|
|
|
|
def safe_bytes(str_, from_encoding=None) -> bytes:
|
|
|
"""
|
|
|
safe bytes function. Does few trick to turn str_ into bytes string:
|
|
|
|
|
|
:param str_: string to decode
|
|
|
:param from_encoding: encode from this type UTF8 default
|
|
|
"""
|
|
|
if isinstance(str_, bytes):
|
|
|
return str_
|
|
|
|
|
|
if not isinstance(str_, str):
|
|
|
raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
|
|
|
|
|
|
from_encoding = from_encoding or get_default_encodings()
|
|
|
if not isinstance(from_encoding, (list, tuple)):
|
|
|
from_encoding = [from_encoding]
|
|
|
|
|
|
for enc in from_encoding:
|
|
|
try:
|
|
|
return str_.encode(enc)
|
|
|
except UnicodeDecodeError:
|
|
|
pass
|
|
|
|
|
|
return str_.encode(from_encoding[0], 'replace')
|
|
|
|
|
|
|
|
|
def ascii_bytes(str_, allow_bytes=False) -> bytes:
|
|
|
"""
|
|
|
Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
|
|
|
Fails with UnicodeError on invalid input.
|
|
|
This should be used where encoding and "safe" ambiguity should be avoided.
|
|
|
Where strings already have been encoded in other ways but still are unicode
|
|
|
string - for example to hex, base64, json, urlencoding, or are known to be
|
|
|
identifiers.
|
|
|
"""
|
|
|
if allow_bytes and isinstance(str_, bytes):
|
|
|
return str_
|
|
|
|
|
|
if not isinstance(str_, str):
|
|
|
raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
|
|
|
return str_.encode('ascii')
|
|
|
|
|
|
|
|
|
def ascii_str(str_) -> str:
|
|
|
"""
|
|
|
Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
|
|
|
Fails with UnicodeError on invalid input.
|
|
|
This should be used where encoding and "safe" ambiguity should be avoided.
|
|
|
Where strings are encoded but also in other ways are known to be ASCII, and
|
|
|
where a unicode string is wanted without caring about encoding. For example
|
|
|
to hex, base64, urlencoding, or are known to be identifiers.
|
|
|
"""
|
|
|
|
|
|
if not isinstance(str_, bytes):
|
|
|
raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
|
|
|
return str_.decode('ascii')
|
|
|
|
|
|
|
|
|
def convert_to_str(data):
|
|
|
if isinstance(data, bytes):
|
|
|
return safe_str(data)
|
|
|
elif isinstance(data, tuple):
|
|
|
return tuple(convert_to_str(item) for item in data)
|
|
|
elif isinstance(data, list):
|
|
|
return list(convert_to_str(item) for item in data)
|
|
|
else:
|
|
|
return data
|
|
|
|
|
|
|
|
|
def splitnewlines(text: bytes):
|
|
|
"""
|
|
|
like splitlines, but only split on newlines.
|
|
|
"""
|
|
|
|
|
|
lines = [_l + b'\n' for _l in text.split(b'\n')]
|
|
|
if lines:
|
|
|
if lines[-1] == b'\n':
|
|
|
lines.pop()
|
|
|
else:
|
|
|
lines[-1] = lines[-1][:-1]
|
|
|
return lines
|
|
|
|