upstream/kallithea Commit - r8077:b1a3e6df

70

71

def safe_unicode(s):

71

def safe_unicode(s):

72

"""

72

"""

73

Safe unicode function. Use a few tricks to turn s into ~~unicode~~ str~~ing~~:

73

Safe unicode str function. Use a few tricks to turn s into str:

74

In case of UnicodeDecodeError with configured default encodings, try to

74

In case of UnicodeDecodeError with configured default encodings, try to

75

detect encoding with chardet library, then fall back to first encoding with

75

detect encoding with chardet library, then fall back to first encoding with

76

errors replaced.

76

errors replaced.

77

"""

77

"""

78

if isinstance(s, ~~unicode~~):

78

if isinstance(s, str):

79

return s

79

return s

80

81

if not isinstance(s, bytes): # use __str__ ~~/ __unicode__~~ and don't expect UnicodeDecodeError

81

if not isinstance(s, bytes): # use __str__ and don't expect UnicodeDecodeError

82

return ~~unicode~~(s)

82

return str(s)

83

84

from kallithea.lib.vcs.conf import settings

84

from kallithea.lib.vcs.conf import settings

85

for enc in settings.DEFAULT_ENCODINGS:

85

for enc in settings.DEFAULT_ENCODINGS:

86

try:

86

try:

87

return ~~unicode~~(s, enc)

87

return str(s, enc)

88

except UnicodeDecodeError:

88

except UnicodeDecodeError:

89

pass

89

pass

90

96

except (ImportError, UnicodeDecodeError):

96

except (ImportError, UnicodeDecodeError):

97

pass

97

pass

98

99

return ~~unicode~~(s, settings.DEFAULT_ENCODINGS[0], 'replace')

99

return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')

100

101

102

def safe_bytes(s):

102

def safe_bytes(s):

108

if isinstance(s, bytes):

108

if isinstance(s, bytes):

109

return s

109

return s

110

111

assert isinstance(s, ~~unicode~~), repr(s) # bytes cannot coerse with __str__ or handle None or int

111

assert isinstance(s, str), repr(s) # bytes cannot coerse with __str__ or handle None or int

112

113

from kallithea.lib.vcs.conf import settings

113

from kallithea.lib.vcs.conf import settings

114

for enc in settings.DEFAULT_ENCODINGS:

114

for enc in settings.DEFAULT_ENCODINGS:

120

return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')

120

return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')

121

122

123

safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3

123

safe_str = safe_unicode

124

125

126

def ascii_bytes(s):

126

def ascii_bytes(s):

127

"""

127

"""

128

Simple conversion from ~~unicode/~~str to bytes, *assuming* all codepoints are

128

Simple conversion from str to bytes, *assuming* all codepoints are

129

7-bit and it thus is pure ASCII.

129

7-bit and it thus is pure ASCII.

130

Will fail badly with UnicodeError on invalid input.

130

Will fail badly with UnicodeError on invalid input.

131

This should be used where enocding and "safe" ambiguity should be avoided.

131

This should be used where enocding and "safe" ambiguity should be avoided.

134

identifiers.

134

identifiers.

135

136

>>> ascii_bytes('a')

136

>>> ascii_bytes('a')

137

'a'

137

b'a'

138

>>> ascii_bytes(u'a')

138

>>> ascii_bytes(u'a')

139

'a'

139

b'a'

140

>>> ascii_bytes('å')

140

>>> ascii_bytes('å')

141

Traceback (most recent call last):

141

Traceback (most recent call last):

142

UnicodeDecodeError: 'ascii' codec can't ~~decode byte 0xc3~~ in position 0: ordinal not in range(128)

142

UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)

143

>>> ascii_bytes(u'å')

143

>>> ascii_bytes('å'.encode('utf8'))

144

Traceback (most recent call last):

144

Traceback (most recent call last):

145

UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)

145

AssertionError: b'\xc3\xa5'

146

"""

146

"""

147

assert isinstance(s, (~~unicode~~, str)), repr(s)

147

assert isinstance(s, str), repr(s)

148

return s.encode('ascii')

148

return s.encode('ascii')

149

150

158

where a unicode string is wanted without caring about encoding. For example

158

where a unicode string is wanted without caring about encoding. For example

159

to hex, base64, urlencoding, or are known to be identifiers.

159

to hex, base64, urlencoding, or are known to be identifiers.

160

161

>>> ascii_str('a')

161

>>> ascii_str(b'a')

162

'a'

162

'a'

163

>>> ascii_str(u'a')

163

>>> ascii_str(u'a')

164

Traceback (most recent call last):

164

Traceback (most recent call last):

165

AssertionError: u'a'

165

AssertionError: 'a'

166

>>> ascii_str('å')

166

>>> ascii_str('å'.encode('utf8'))

167

Traceback (most recent call last):

167

Traceback (most recent call last):

168

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)

168

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)

169

>>> ascii_str(u'å')

169

>>> ascii_str(u'å')

170

Traceback (most recent call last):

170

Traceback (most recent call last):

171

AssertionError: ~~u'\xc3\xa5~~'

171

AssertionError: 'å'

172

"""

172

"""

173

assert isinstance(s, bytes), repr(s)

173

assert isinstance(s, bytes), repr(s)

174

# Note: we use "encode", even though we really *should* use "decode". But

174

return s.decode('ascii')

175

# we are in py2 and don't want py2, and encode is doing what we need for the

176

# ascii subset.

177

return s.encode('ascii')

178

175

179

176

180

# Regex taken from http://www.regular-expressions.info/email.html

177

# Regex taken from http://www.regular-expressions.info/email.html

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             def safe_unicode(s):
                 """
-                Safe unicode function. Use a few tricks to turn s into unicode string:
+                Safe unicode str function. Use a few tricks to turn s into str:
                 In case of UnicodeDecodeError with configured default encodings, try to
                 detect encoding with chardet library, then fall back to first encoding with
                 errors replaced.
                 """
-                if isinstance(s, unicode):
+                if isinstance(s, str):
                     return s
-                if not isinstance(s, bytes):  # use __str__ / __unicode__ and don't expect UnicodeDecodeError
+                if not isinstance(s, bytes):  # use __str__ and don't expect UnicodeDecodeError
-                    return unicode(s)
+                    return str(s)
                 from kallithea.lib.vcs.conf import settings
                 for enc in settings.DEFAULT_ENCODINGS:
                     try:
-                        return unicode(s, enc)
+                        return str(s, enc)
                     except UnicodeDecodeError:
                         pass
                 except (ImportError, UnicodeDecodeError):
                     pass
-                return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
+                return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')
             def safe_bytes(s):
                 if isinstance(s, bytes):
                     return s
-                assert isinstance(s, unicode), repr(s)  # bytes cannot coerse with __str__ or handle None or int
+                assert isinstance(s, str), repr(s)  # bytes cannot coerse with __str__ or handle None or int
                 from kallithea.lib.vcs.conf import settings
                 for enc in settings.DEFAULT_ENCODINGS:
                 return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
-            safe_str = safe_bytes  # safe_str is deprecated - it will be redefined when changing to py3
+            safe_str = safe_unicode
             def ascii_bytes(s):
                 """
-                Simple conversion from unicode/str to bytes, *assuming* all codepoints are
+                Simple conversion from str to bytes, *assuming* all codepoints are
 -bit and it thus is pure ASCII.
                 Will fail badly with UnicodeError on invalid input.
                 This should be used where enocding and "safe" ambiguity should be avoided.
                 identifiers.
                 >>> ascii_bytes('a')
-                'a'
+                b'a'
                 >>> ascii_bytes(u'a')
-                'a'
+                b'a'
                 >>> ascii_bytes('å')
                 Traceback (most recent call last):
-                UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
+                UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
-                >>> ascii_bytes(u'å')
+                >>> ascii_bytes('å'.encode('utf8'))
                 Traceback (most recent call last):
-                UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
+                AssertionError: b'\xc3\xa5'
                 """
-                assert isinstance(s, (unicode, str)), repr(s)
+                assert isinstance(s, str), repr(s)
                 return s.encode('ascii')
                 where a unicode string is wanted without caring about encoding. For example
                 to hex, base64, urlencoding, or are known to be identifiers.
-                >>> ascii_str('a')
+                >>> ascii_str(b'a')
                 'a'
                 >>> ascii_str(u'a')
                 Traceback (most recent call last):
-                AssertionError: u'a'
+                AssertionError: 'a'
-                >>> ascii_str('å')
+                >>> ascii_str('å'.encode('utf8'))
                 Traceback (most recent call last):
                 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
                 >>> ascii_str(u'å')
                 Traceback (most recent call last):
-                AssertionError: u'\xc3\xa5'
+                AssertionError: 'å'
                 """
                 assert isinstance(s, bytes), repr(s)
-                # Note: we use "encode", even though we really *should* use "decode". But
+                return s.decode('ascii')
-                # we are in py2 and don't want py2, and encode is doing what we need for the
-                # ascii subset.
-                return s.encode('ascii')
             # Regex taken from http://www.regular-expressions.info/email.html