##// END OF EJS Templates
py3: update safe_* functions for how unicode pretty much has been renamed to str...
Mads Kiilerich -
r8077:b1a3e6df default
parent child Browse files
Show More
@@ -70,21 +70,21 b' def safe_int(val, default=None):'
70 70
71 71 def safe_unicode(s):
72 72 """
73 Safe unicode function. Use a few tricks to turn s into unicode string:
73 Safe unicode str function. Use a few tricks to turn s into str:
74 74 In case of UnicodeDecodeError with configured default encodings, try to
75 75 detect encoding with chardet library, then fall back to first encoding with
76 76 errors replaced.
77 77 """
78 if isinstance(s, unicode):
78 if isinstance(s, str):
79 79 return s
80 80
81 if not isinstance(s, bytes): # use __str__ / __unicode__ and don't expect UnicodeDecodeError
82 return unicode(s)
81 if not isinstance(s, bytes): # use __str__ and don't expect UnicodeDecodeError
82 return str(s)
83 83
84 84 from kallithea.lib.vcs.conf import settings
85 85 for enc in settings.DEFAULT_ENCODINGS:
86 86 try:
87 return unicode(s, enc)
87 return str(s, enc)
88 88 except UnicodeDecodeError:
89 89 pass
90 90
@@ -96,7 +96,7 b' def safe_unicode(s):'
96 96 except (ImportError, UnicodeDecodeError):
97 97 pass
98 98
99 return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
99 return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')
100 100
101 101
102 102 def safe_bytes(s):
@@ -108,7 +108,7 b' def safe_bytes(s):'
108 108 if isinstance(s, bytes):
109 109 return s
110 110
111 assert isinstance(s, unicode), repr(s) # bytes cannot coerse with __str__ or handle None or int
111 assert isinstance(s, str), repr(s) # bytes cannot coerse with __str__ or handle None or int
112 112
113 113 from kallithea.lib.vcs.conf import settings
114 114 for enc in settings.DEFAULT_ENCODINGS:
@@ -120,12 +120,12 b' def safe_bytes(s):'
120 120 return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
121 121
122 122
123 safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3
123 safe_str = safe_unicode
124 124
125 125
126 126 def ascii_bytes(s):
127 127 """
128 Simple conversion from unicode/str to bytes, *assuming* all codepoints are
128 Simple conversion from str to bytes, *assuming* all codepoints are
129 129 7-bit and it thus is pure ASCII.
130 130 Will fail badly with UnicodeError on invalid input.
131 131 This should be used where enocding and "safe" ambiguity should be avoided.
@@ -134,17 +134,17 b' def ascii_bytes(s):'
134 134 identifiers.
135 135
136 136 >>> ascii_bytes('a')
137 'a'
137 b'a'
138 138 >>> ascii_bytes(u'a')
139 'a'
139 b'a'
140 140 >>> ascii_bytes('å')
141 141 Traceback (most recent call last):
142 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
143 >>> ascii_bytes(u'å')
142 UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
143 >>> ascii_bytes('å'.encode('utf8'))
144 144 Traceback (most recent call last):
145 UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
145 AssertionError: b'\xc3\xa5'
146 146 """
147 assert isinstance(s, (unicode, str)), repr(s)
147 assert isinstance(s, str), repr(s)
148 148 return s.encode('ascii')
149 149
150 150
@@ -158,23 +158,20 b' def ascii_str(s):'
158 158 where a unicode string is wanted without caring about encoding. For example
159 159 to hex, base64, urlencoding, or are known to be identifiers.
160 160
161 >>> ascii_str('a')
161 >>> ascii_str(b'a')
162 162 'a'
163 163 >>> ascii_str(u'a')
164 164 Traceback (most recent call last):
165 AssertionError: u'a'
166 >>> ascii_str('å')
165 AssertionError: 'a'
166 >>> ascii_str('å'.encode('utf8'))
167 167 Traceback (most recent call last):
168 168 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
169 169 >>> ascii_str(u'å')
170 170 Traceback (most recent call last):
171 AssertionError: u'\xc3\xa5'
171 AssertionError: '
172 172 """
173 173 assert isinstance(s, bytes), repr(s)
174 # Note: we use "encode", even though we really *should* use "decode". But
175 # we are in py2 and don't want py2, and encode is doing what we need for the
176 # ascii subset.
177 return s.encode('ascii')
174 return s.decode('ascii')
178 175
179 176
180 177 # Regex taken from http://www.regular-expressions.info/email.html
General Comments 0
You need to be logged in to leave comments. Login now