##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -157,44 +157,66 b' def generate_api_key(username, salt=None'
157 return hashlib.sha1(username + salt).hexdigest()
157 return hashlib.sha1(username + salt).hexdigest()
158
158
159
159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 """
161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
162 safe unicode function. Does few trick to turn str_ into unicode
163 unicode with errors replaceed
164
163
165 :param _str: string to decode
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
166
167 :param str_: string to decode
166 :rtype: unicode
168 :rtype: unicode
167 :returns: unicode object
169 :returns: unicode object
168 """
170 """
169
171
170 if isinstance(_str, unicode):
172 if isinstance(str_, unicode):
171 return _str
173 return str_
174
175 try:
176 return unicode(str_, from_encoding)
177 except UnicodeDecodeError:
178 pass
172
179
173 try:
180 try:
174 u_str = unicode(_str, from_encoding)
181 import chardet
175 except UnicodeDecodeError:
182 encoding = chardet.detect(str_)['encoding']
176 u_str = unicode(_str, from_encoding, 'replace')
183 if encoding is None:
184 raise UnicodeDecodeError()
177
185
178 return u_str
186 return str_.decode(encoding)
179
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180
189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 """
191 """
183 safe str function. In case of UnicodeEncode error we try to return
192 safe str function. Does few trick to turn unicode_ into string
184 str with errors replaceed
185
193
186 :param _unicode: unicode to encode
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
196
197 :param unicode_: unicode to encode
187 :rtype: str
198 :rtype: str
188 :returns: str object
199 :returns: str object
189 """
200 """
190
201
191 if isinstance(_unicode, str):
202 if isinstance(unicode_, str):
192 return _unicode
203 return unicode_
204
205 try:
206 return str(unicode_)
207 except UnicodeEncodeError:
208 pass
193
209
194 try:
210 try:
195 safe_str = str(_unicode)
211 import chardet
196 except UnicodeEncodeError:
212 encoding = chardet.detect(unicode_)['encoding']
197 safe_str = _unicode.encode(to_encoding, 'replace')
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198
220
199 return safe_str
221 return safe_str
200
222
General Comments 0
You need to be logged in to leave comments. Login now