rhodecode-enterprise-ce Files · rhodecode/lib/str_utils.py

deps: bumped pycryptodome==3.21.0 for security issue

super-admin - - Load All Authors

File last commit:

r5608:6d33e504 default


                r5640:acc4336c

default

Download file

             str_utils.py
        
                    187 lines
            
             | 5.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / str_utils.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        super-admin
    
core: updated copyright to 2024

              r5608
            
      # Copyright (C) 2011-2024 RhodeCode GmbH

        super-admin
    
core: break down some utils for better imports

              r4915
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      import typing

      import base64

        super-admin
    
core: break down some utils for better imports

              r4915
            
      import logging

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      from unidecode import unidecode

        super-admin
    
core: break down some utils for better imports

              r4915
            
      import rhodecode

      from rhodecode.lib.type_utils import aslist

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
        super-admin
    
core: break down some utils for better imports

              r4915
            
      log = logging.getLogger(__name__)

      def safe_int(val, default=None) -> int:

          """

          Returns int() of val if val is not convertable to int use default

          instead

          :param val:

          :param default:

          """

          try:

              val = int(val)

          except (ValueError, TypeError):

              val = default

          return val

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      def safe_float(val, default=None) -> float:

          """

          Returns float() of val if val is not convertable to float use default

          instead

          :param val:

          :param default:

          """

          try:

              val = float(val)

          except (ValueError, TypeError):

              val = default

          return val

        super-admin
    
cleanups: typing + whitespaces

              r5101
            
      def base64_to_str(text: str | bytes) -> str:

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
          return safe_str(base64.encodebytes(safe_bytes(text))).strip()

        super-admin
    
modernize: python3 updates

              r5096
            
      def get_default_encodings() -> list[str]:

        super-admin
    
core: break down some utils for better imports

              r4915
            
          return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',')

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      DEFAULT_ENCODINGS = get_default_encodings()

        super-admin
    
core: break down some utils for better imports

              r4915
            
      def safe_str(str_, to_encoding=None) -> str:

          """

          safe str function. Does few trick to turn unicode_ into string

          :param str_: str to encode

          :param to_encoding: encode to this type UTF8 default

          """

          if isinstance(str_, str):

              return str_

          # if it's bytes cast to str

          if not isinstance(str_, bytes):

              return str(str_)

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
          to_encoding = to_encoding or DEFAULT_ENCODINGS

        super-admin
    
core: break down some utils for better imports

              r4915
            
          if not isinstance(to_encoding, (list, tuple)):

              to_encoding = [to_encoding]

          for enc in to_encoding:

              try:

                  return str(str_, enc)

              except UnicodeDecodeError:

                  pass

          return str(str_, to_encoding[0], 'replace')

      def safe_bytes(str_, from_encoding=None) -> bytes:

          """

          safe bytes function. Does few trick to turn str_ into bytes string:

          :param str_: string to decode

          :param from_encoding: encode from this type UTF8 default

          """

          if isinstance(str_, bytes):

              return str_

          if not isinstance(str_, str):

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
              raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')

        super-admin
    
core: break down some utils for better imports

              r4915
            
          from_encoding = from_encoding or get_default_encodings()

          if not isinstance(from_encoding, (list, tuple)):

              from_encoding = [from_encoding]

          for enc in from_encoding:

              try:

                  return str_.encode(enc)

              except UnicodeDecodeError:

                  pass

          return str_.encode(from_encoding[0], 'replace')

      def ascii_bytes(str_, allow_bytes=False) -> bytes:

          """

          Simple conversion from str to bytes, with assumption that str_ is pure ASCII.

          Fails with UnicodeError on invalid input.

          This should be used where encoding and "safe" ambiguity should be avoided.

          Where strings already have been encoded in other ways but still are unicode

          string - for example to hex, base64, json, urlencoding, or are known to be

          identifiers.

          """

          if allow_bytes and isinstance(str_, bytes):

              return str_

          if not isinstance(str_, str):

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
              raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')

        super-admin
    
core: break down some utils for better imports

              r4915
            
          return str_.encode('ascii')

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      def ascii_str(str_) -> str:

        super-admin
    
core: break down some utils for better imports

              r4915
            
          """

          Simple conversion from bytes to str, with assumption that str_ is pure ASCII.

          Fails with UnicodeError on invalid input.

          This should be used where encoding and "safe" ambiguity should be avoided.

          Where strings are encoded but also in other ways are known to be ASCII, and

          where a unicode string is wanted without caring about encoding. For example

          to hex, base64, urlencoding, or are known to be identifiers.

          """

          if not isinstance(str_, bytes):

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
              raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')

        super-admin
    
core: break down some utils for better imports

              r4915
            
          return str_.decode('ascii')

        super-admin
    
str_utils: added common non-ascii replacer

              r4989
            
        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
      def convert_special_chars(str_) -> str:

        super-admin
    
str_utils: added common non-ascii replacer

              r4989
            
          """

              trie to replace non-ascii letters to their ascii representation eg::

                  `żołw` converts into `zolw`

          """

          value = safe_str(str_)

        super-admin
    
core: multiple fixes to unicode vs str usage...

              r5065
            
          converted_value = unidecode(value)

        super-admin
    
str_utils: added common non-ascii replacer

              r4989
            
          return converted_value

        super-admin
    
chore(sync-up): synced libs/settings maker from vcsserver

              r5337
            
      def splitnewlines(text: bytes):

          """

          like splitlines, but only split on newlines.

          """

          lines = [_l + b'\n' for _l in text.split(b'\n')]

          if lines:

              if lines[-1] == b'\n':

                  lines.pop()

              else:

                  lines[-1] = lines[-1][:-1]

          return lines

        super-admin
    
feat(artifacts): new artifact storage engines allowing an s3 based uploads

              r5516
            
      def header_safe_str(val):

          return safe_bytes(val).decode('latin-1', errors='replace')

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

super-admin core: updated copyright to 2024	r5608	# Copyright (C) 2011-2024 RhodeCode GmbH
super-admin core: break down some utils for better imports	r4915	#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/

super-admin core: multiple fixes to unicode vs str usage...	r5065	import typing
		import base64
super-admin core: break down some utils for better imports	r4915	import logging
super-admin core: multiple fixes to unicode vs str usage...	r5065	from unidecode import unidecode

super-admin core: break down some utils for better imports	r4915	import rhodecode
		from rhodecode.lib.type_utils import aslist

super-admin core: multiple fixes to unicode vs str usage...	r5065
super-admin core: break down some utils for better imports	r4915	log = logging.getLogger(__name__)


		def safe_int(val, default=None) -> int:
		"""
		Returns int() of val if val is not convertable to int use default
		instead

		:param val:
		:param default:
		"""

		try:
		val = int(val)
		except (ValueError, TypeError):
		val = default

		return val


super-admin core: multiple fixes to unicode vs str usage...	r5065	def safe_float(val, default=None) -> float:
		"""
		Returns float() of val if val is not convertable to float use default
		instead

		:param val:
		:param default:
		"""

		try:
		val = float(val)
		except (ValueError, TypeError):
		val = default

		return val


super-admin cleanups: typing + whitespaces	r5101	def base64_to_str(text: str \| bytes) -> str:
super-admin core: multiple fixes to unicode vs str usage...	r5065	return safe_str(base64.encodebytes(safe_bytes(text))).strip()


super-admin modernize: python3 updates	r5096	def get_default_encodings() -> list[str]:
super-admin core: break down some utils for better imports	r4915	return aslist(rhodecode.CONFIG.get('default_encoding', 'utf8'), sep=',')


super-admin core: multiple fixes to unicode vs str usage...	r5065	DEFAULT_ENCODINGS = get_default_encodings()


super-admin core: break down some utils for better imports	r4915	def safe_str(str_, to_encoding=None) -> str:
		"""
		safe str function. Does few trick to turn unicode_ into string

		:param str_: str to encode
		:param to_encoding: encode to this type UTF8 default
		"""
		if isinstance(str_, str):
		return str_

		# if it's bytes cast to str
		if not isinstance(str_, bytes):
		return str(str_)

super-admin core: multiple fixes to unicode vs str usage...	r5065	to_encoding = to_encoding or DEFAULT_ENCODINGS
super-admin core: break down some utils for better imports	r4915	if not isinstance(to_encoding, (list, tuple)):
		to_encoding = [to_encoding]

		for enc in to_encoding:
		try:
		return str(str_, enc)
		except UnicodeDecodeError:
		pass

		return str(str_, to_encoding[0], 'replace')


		def safe_bytes(str_, from_encoding=None) -> bytes:
		"""
		safe bytes function. Does few trick to turn str_ into bytes string:

		:param str_: string to decode
		:param from_encoding: encode from this type UTF8 default
		"""
		if isinstance(str_, bytes):
		return str_

		if not isinstance(str_, str):
super-admin core: multiple fixes to unicode vs str usage...	r5065	raise ValueError(f'safe_bytes cannot convert other types than str: got: {type(str_)}')
super-admin core: break down some utils for better imports	r4915
		from_encoding = from_encoding or get_default_encodings()
		if not isinstance(from_encoding, (list, tuple)):
		from_encoding = [from_encoding]

		for enc in from_encoding:
		try:
		return str_.encode(enc)
		except UnicodeDecodeError:
		pass

		return str_.encode(from_encoding[0], 'replace')


		def ascii_bytes(str_, allow_bytes=False) -> bytes:
		"""
		Simple conversion from str to bytes, with assumption that str_ is pure ASCII.
		Fails with UnicodeError on invalid input.
		This should be used where encoding and "safe" ambiguity should be avoided.
		Where strings already have been encoded in other ways but still are unicode
		string - for example to hex, base64, json, urlencoding, or are known to be
		identifiers.
		"""
		if allow_bytes and isinstance(str_, bytes):
		return str_

		if not isinstance(str_, str):
super-admin core: multiple fixes to unicode vs str usage...	r5065	raise ValueError(f'ascii_bytes cannot convert other types than str: got: {type(str_)}')
super-admin core: break down some utils for better imports	r4915	return str_.encode('ascii')


super-admin core: multiple fixes to unicode vs str usage...	r5065	def ascii_str(str_) -> str:
super-admin core: break down some utils for better imports	r4915	"""
		Simple conversion from bytes to str, with assumption that str_ is pure ASCII.
		Fails with UnicodeError on invalid input.
		This should be used where encoding and "safe" ambiguity should be avoided.
		Where strings are encoded but also in other ways are known to be ASCII, and
		where a unicode string is wanted without caring about encoding. For example
		to hex, base64, urlencoding, or are known to be identifiers.
		"""

		if not isinstance(str_, bytes):
super-admin core: multiple fixes to unicode vs str usage...	r5065	raise ValueError(f'ascii_str cannot convert other types than bytes: got: {type(str_)}')
super-admin core: break down some utils for better imports	r4915	return str_.decode('ascii')
super-admin str_utils: added common non-ascii replacer	r4989

super-admin core: multiple fixes to unicode vs str usage...	r5065	def convert_special_chars(str_) -> str:
super-admin str_utils: added common non-ascii replacer	r4989	"""
		trie to replace non-ascii letters to their ascii representation eg::

		`żołw` converts into `zolw`
		"""
		value = safe_str(str_)
super-admin core: multiple fixes to unicode vs str usage...	r5065	converted_value = unidecode(value)
super-admin str_utils: added common non-ascii replacer	r4989	return converted_value
super-admin chore(sync-up): synced libs/settings maker from vcsserver	r5337

		def splitnewlines(text: bytes):
		"""
		like splitlines, but only split on newlines.
		"""

		lines = [_l + b'\n' for _l in text.split(b'\n')]
		if lines:
		if lines[-1] == b'\n':
		lines.pop()
		else:
		lines[-1] = lines[-1][:-1]
		return lines
super-admin feat(artifacts): new artifact storage engines allowing an s3 based uploads	r5516

		def header_safe_str(val):
		return safe_bytes(val).decode('latin-1', errors='replace')