rhodecode-enterprise-ce Files · rhodecode/lib/str_utils.py

added latest changes

super-admin - - Load All Authors

File last commit:

r5647:8333bc7b default


                r5658:a109f5ac

default

Download file

             str_utils.py
        
                    184 lines
            
             | 5.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / str_utils.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # Copyright (C) 2011-2024 RhodeCode GmbH

      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      import typing

      import base64

      import logging

      from unidecode import unidecode

      import rhodecode

      from rhodecode.lib.type_utils import aslist

      log = logging.getLogger(__name__)

      def safe_int(val, default=None) -> int:

          """

          Returns int() of val if val is not convertable to int use default

          instead

          :param val:

          :param default:

          """

          try:

              val = int(val)

          except (ValueError, TypeError):

              val = default

          return val

      def safe_float(val, default=None) -> float:

          """

          Returns float() of val if val is not convertable to float use default

          instead

          :param val:

          :param default:

          """

          try:

              val = float(val)

          except (ValueError, TypeError):

              val = default

          return val

      def base64_to_str(text: str | bytes) -> str:

          return safe_str(base64.encodebytes(safe_bytes(text))).strip()

      def get_default_encodings() -> list[str]:

          return rhodecode.ConfigGet().get_list('default_encoding', missing='utf8')

      def safe_str(str_, to_encoding=None) -> str:

          """

          safe str function. Does few trick to turn unicode_ into string

          :param str_: str to encode

          :param to_encoding: encode to this type UTF8 default

          """

          if isinstance(str_, str):

              return str_

          # if it's bytes cast to str

          if not isinstance(str_, bytes):

              return str(str_)

          to_encoding = to_encoding or get_default_encodings()

          if not isinstance(to_encoding, (list, tuple)):

              to_encoding = [to_encoding]

          for enc in to_encoding:

              try:

                  return str(str_, enc)

              except UnicodeDecodeError:

                  pass

          return str(str_, to_encoding[0], 'replace')

      def safe_bytes(str_, from_encoding=None) -> bytes:

          """

          safe bytes function. Does few trick to turn str_ into bytes string:

          :param str_: string to decode

          :param from_encoding: encode from this type UTF8 default

          """

          if isinstance(str_, bytes):

              return str_

          if not isinstance(str_, str):

              raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')

          from_encoding = from_encoding or get_default_encodings()

          if not isinstance(from_encoding, (list, tuple)):

              from_encoding = [from_encoding]

          for enc in from_encoding:

              try:

                  return str_.encode(enc)

              except (UnicodeDecodeError, UnicodeEncodeError):

                  pass

          return str_.encode(from_encoding[0], 'replace')

      def ascii_bytes(str_, allow_bytes=False) -> bytes:

          """

          Simple conversion from str to bytes, with assumption that str_ is pure ASCII.

          Fails with UnicodeError on invalid input.

          This should be used where encoding and "safe" ambiguity should be avoided.

          Where strings already have been encoded in other ways but still are unicode

          string - for example to hex, base64, json, urlencoding, or are known to be

          identifiers.

          """

          if allow_bytes and isinstance(str_, bytes):

              return str_

          if not isinstance(str_, str):

              raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')

          return str_.encode('ascii')

      def ascii_str(str_) -> str:

          """

          Simple conversion from bytes to str, with assumption that str_ is pure ASCII.

          Fails with UnicodeError on invalid input.

          This should be used where encoding and "safe" ambiguity should be avoided.

          Where strings are encoded but also in other ways are known to be ASCII, and

          where a unicode string is wanted without caring about encoding. For example

          to hex, base64, urlencoding, or are known to be identifiers.

          """

          if not isinstance(str_, bytes):

              raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')

          return str_.decode('ascii')

      def convert_special_chars(str_) -> str:

          """

              trie to replace non-ascii letters to their ascii representation eg::

                  `żołw` converts into `zolw`

          """

          value = safe_str(str_)

          converted_value = unidecode(value)

          return converted_value

      def splitnewlines(text: bytes):

          """

          like splitlines, but only split on newlines.

          """

          lines = [_l + b'\n' for _l in text.split(b'\n')]

          if lines:

              if lines[-1] == b'\n':

                  lines.pop()

              else:

                  lines[-1] = lines[-1][:-1]

          return lines

      def header_safe_str(val):

          return safe_bytes(val).decode('latin-1', errors='replace')

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# Copyright (C) 2011-2024 RhodeCode GmbH
				#
				# This program is free software: you can redistribute it and/or modify
				# it under the terms of the GNU Affero General Public License, version 3
				# (only), as published by the Free Software Foundation.
				#
				# This program is distributed in the hope that it will be useful,
				# but WITHOUT ANY WARRANTY; without even the implied warranty of
				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				# GNU General Public License for more details.
				#
				# You should have received a copy of the GNU Affero General Public License
				# along with this program. If not, see <http://www.gnu.org/licenses/>.
				#
				# This program is dual-licensed. If you wish to learn more about the
				# RhodeCode Enterprise Edition, including its added features, Support services,
				# and proprietary license terms, please see https://rhodecode.com/licenses/

				import typing
				import base64
				import logging
				from unidecode import unidecode

				import rhodecode
				from rhodecode.lib.type_utils import aslist


				log = logging.getLogger(__name__)


				def safe_int(val, default=None) -> int:
				"""
				Returns int() of val if val is not convertable to int use default
				instead

				:param val:
				:param default:
				"""

				try:
				val = int(val)
				except (ValueError, TypeError):
				val = default

				return val


				def safe_float(val, default=None) -> float:
				"""
				Returns float() of val if val is not convertable to float use default
				instead

				:param val:
				:param default:
				"""

				try:
				val = float(val)
				except (ValueError, TypeError):
				val = default

				return val


				def base64_to_str(text: str \| bytes) -> str:
				return safe_str(base64.encodebytes(safe_bytes(text))).strip()


				def get_default_encodings() -> list[str]:
				return rhodecode.ConfigGet().get_list('default_encoding', missing='utf8')


				def safe_str(str_, to_encoding=None) -> str:
				"""
				safe str function. Does few trick to turn unicode_ into string

				:param str_: str to encode
				:param to_encoding: encode to this type UTF8 default
				"""
				if isinstance(str_, str):
				return str_

				# if it's bytes cast to str
				if not isinstance(str_, bytes):
				return str(str_)

				to_encoding = to_encoding or get_default_encodings()
				if not isinstance(to_encoding, (list, tuple)):
				to_encoding = [to_encoding]

				for enc in to_encoding:
				try:
				return str(str_, enc)
				except UnicodeDecodeError:
				pass

				return str(str_, to_encoding[0], 'replace')


				def safe_bytes(str_, from_encoding=None) -> bytes:
				"""
				safe bytes function. Does few trick to turn str_ into bytes string:

				:param str_: string to decode
				:param from_encoding: encode from this type UTF8 default
				"""
				if isinstance(str_, bytes):
				return str_

				if not isinstance(str_, str):
				raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')

				from_encoding = from_encoding or get_default_encodings()
				if not isinstance(from_encoding, (list, tuple)):
				from_encoding = [from_encoding]

				for enc in from_encoding:
				try:
				return str_.encode(enc)
				except (UnicodeDecodeError, UnicodeEncodeError):
				pass

				return str_.encode(from_encoding[0], 'replace')


				def ascii_bytes(str_, allow_bytes=False) -> bytes:
				"""
				Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
				Fails with UnicodeError on invalid input.
				This should be used where encoding and "safe" ambiguity should be avoided.
				Where strings already have been encoded in other ways but still are unicode
				string - for example to hex, base64, json, urlencoding, or are known to be
				identifiers.
				"""
				if allow_bytes and isinstance(str_, bytes):
				return str_

				if not isinstance(str_, str):
				raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
				return str_.encode('ascii')


				def ascii_str(str_) -> str:
				"""
				Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
				Fails with UnicodeError on invalid input.
				This should be used where encoding and "safe" ambiguity should be avoided.
				Where strings are encoded but also in other ways are known to be ASCII, and
				where a unicode string is wanted without caring about encoding. For example
				to hex, base64, urlencoding, or are known to be identifiers.
				"""

				if not isinstance(str_, bytes):
				raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
				return str_.decode('ascii')


				def convert_special_chars(str_) -> str:
				"""
				trie to replace non-ascii letters to their ascii representation eg::

				`żołw` converts into `zolw`
				"""
				value = safe_str(str_)
				converted_value = unidecode(value)
				return converted_value


				def splitnewlines(text: bytes):
				"""
				like splitlines, but only split on newlines.
				"""

				lines = [_l + b'\n' for _l in text.split(b'\n')]
				if lines:
				if lines[-1] == b'\n':
				lines.pop()
				else:
				lines[-1] = lines[-1][:-1]
				return lines


				def header_safe_str(val):
				return safe_bytes(val).decode('latin-1', errors='replace')