##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -157,44 +157,66 b' def generate_api_key(username, salt=None'
157 157 return hashlib.sha1(username + salt).hexdigest()
158 158
159 159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
163 unicode with errors replaceed
162 safe unicode function. Does few trick to turn str_ into unicode
164 163
165 :param _str: string to decode
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
166
167 :param str_: string to decode
166 168 :rtype: unicode
167 169 :returns: unicode object
168 170 """
169 171
170 if isinstance(_str, unicode):
171 return _str
172 if isinstance(str_, unicode):
173 return str_
174
175 try:
176 return unicode(str_, from_encoding)
177 except UnicodeDecodeError:
178 pass
172 179
173 180 try:
174 u_str = unicode(_str, from_encoding)
175 except UnicodeDecodeError:
176 u_str = unicode(_str, from_encoding, 'replace')
181 import chardet
182 encoding = chardet.detect(str_)['encoding']
183 if encoding is None:
184 raise UnicodeDecodeError()
177 185
178 return u_str
179
186 return str_.decode(encoding)
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180 189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 191 """
183 safe str function. In case of UnicodeEncode error we try to return
184 str with errors replaceed
192 safe str function. Does few trick to turn unicode_ into string
185 193
186 :param _unicode: unicode to encode
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
196
197 :param unicode_: unicode to encode
187 198 :rtype: str
188 199 :returns: str object
189 200 """
190 201
191 if isinstance(_unicode, str):
192 return _unicode
202 if isinstance(unicode_, str):
203 return unicode_
204
205 try:
206 return str(unicode_)
207 except UnicodeEncodeError:
208 pass
193 209
194 210 try:
195 safe_str = str(_unicode)
196 except UnicodeEncodeError:
197 safe_str = _unicode.encode(to_encoding, 'replace')
211 import chardet
212 encoding = chardet.detect(unicode_)['encoding']
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198 220
199 221 return safe_str
200 222
General Comments 0
You need to be logged in to leave comments. Login now