upstream/kallithea Commit - r8077:b1a3e6df

py3: update safe_* functions for how unicode pretty much has been renamed to str...

Mads Kiilerich -

r8077:b1a3e6df default

parent child

kallithea/lib/vcs/utils/__init__.py

0 +20 -23

              def safe_unicode(s):
                  """
-                 Safe unicode function. Use a few tricks to turn s into unicode string:
+                 Safe unicode str function. Use a few tricks to turn s into str:
                  In case of UnicodeDecodeError with configured default encodings, try to
                  detect encoding with chardet library, then fall back to first encoding with
                  errors replaced.
                  """
-                 if isinstance(s, unicode):
+                 if isinstance(s, str):
                      return s
-                 if not isinstance(s, bytes):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
-                     return unicode(s)
+                 if not isinstance(s, bytes):  # use __str__ and don't expect UnicodeDecodeError
+                     return str(s)
                  from kallithea.lib.vcs.conf import settings
                  for enc in settings.DEFAULT_ENCODINGS:
                      try:
-                         return unicode(s, enc)
+                         return str(s, enc)
                      except UnicodeDecodeError:
                          pass
                  except (ImportError, UnicodeDecodeError):
                      pass
-                 return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
+                 return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')
              def safe_bytes(s):
                  if isinstance(s, bytes):
                      return s
-                 assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
+                 assert isinstance(s, str), repr(s)  # bytes cannot coerse with __str__ or handle None or int
                  from kallithea.lib.vcs.conf import settings
                  for enc in settings.DEFAULT_ENCODINGS:
                  return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
-             safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
+             safe_str = safe_unicode
              def ascii_bytes(s):
                  """
-                 Simple conversion from unicode/str to bytes, *assuming* all codepoints are
+                 Simple conversion from str to bytes, *assuming* all codepoints are
 -bit and it thus is pure ASCII.
                  Will fail badly with UnicodeError on invalid input.
                  This should be used where enocding and "safe" ambiguity should be avoided.
                  identifiers.
                  >>> ascii_bytes('a')
-                 'a'
+                 b'a'
                  >>> ascii_bytes(u'a')
-                 'a'
+                 b'a'
                  >>> ascii_bytes('å')
                  Traceback (most recent call last):
-                 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
-                 >>> ascii_bytes(u'å')
+                 UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
+                 >>> ascii_bytes('å'.encode('utf8'))
                  Traceback (most recent call last):
-                 UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
+                 AssertionError: b'\xc3\xa5'
                  """
-                 assert isinstance(s, (unicode, str)), repr(s)
+                 assert isinstance(s, str), repr(s)
                  return s.encode('ascii')
                  where a unicode string is wanted without caring about encoding. For example
                  to hex, base64, urlencoding, or are known to be identifiers.
-                 >>> ascii_str('a')
+                 >>> ascii_str(b'a')
                  'a'
                  >>> ascii_str(u'a')
                  Traceback (most recent call last):
-                 AssertionError: u'a'
-                 >>> ascii_str('å')
+                 AssertionError: 'a'
+                 >>> ascii_str('å'.encode('utf8'))
                  Traceback (most recent call last):
                  UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
                  >>> ascii_str(u'å')
                  Traceback (most recent call last):
-                 AssertionError: u'\xc3\xa5'
+                 AssertionError: 'å'
                  """
                  assert isinstance(s, bytes), repr(s)
-                 # Note: we use "encode", even though we really *should* use "decode". But
-                 # we are in py2 and don't want py2, and encode is doing what we need for the
-                 # ascii subset.
-                 return s.encode('ascii')
+                 return s.decode('ascii')
              # Regex taken from http://www.regular-expressions.info/email.html

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages