##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -157,44 +157,66 b' def generate_api_key(username, salt=None'
157 return hashlib.sha1(username + salt).hexdigest()
157 return hashlib.sha1(username + salt).hexdigest()
158
158
159
159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 """
161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
162 safe unicode function. Does few trick to turn str_ into unicode
163 unicode with errors replaceed
163
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
164
166
165 :param _str: string to decode
167 :param str_: string to decode
166 :rtype: unicode
168 :rtype: unicode
167 :returns: unicode object
169 :returns: unicode object
168 """
170 """
169
171
170 if isinstance(_str, unicode):
172 if isinstance(str_, unicode):
171 return _str
173 return str_
172
174
173 try:
175 try:
174 u_str = unicode(_str, from_encoding)
176 return unicode(str_, from_encoding)
175 except UnicodeDecodeError:
177 except UnicodeDecodeError:
176 u_str = unicode(_str, from_encoding, 'replace')
178 pass
177
179
178 return u_str
180 try:
179
181 import chardet
182 encoding = chardet.detect(str_)['encoding']
183 if encoding is None:
184 raise UnicodeDecodeError()
185
186 return str_.decode(encoding)
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180
189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 """
191 """
183 safe str function. In case of UnicodeEncode error we try to return
192 safe str function. Does few trick to turn unicode_ into string
184 str with errors replaceed
193
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
185
196
186 :param _unicode: unicode to encode
197 :param unicode_: unicode to encode
187 :rtype: str
198 :rtype: str
188 :returns: str object
199 :returns: str object
189 """
200 """
190
201
191 if isinstance(_unicode, str):
202 if isinstance(unicode_, str):
192 return _unicode
203 return unicode_
193
204
194 try:
205 try:
195 safe_str = str(_unicode)
206 return str(unicode_)
196 except UnicodeEncodeError:
207 except UnicodeEncodeError:
197 safe_str = _unicode.encode(to_encoding, 'replace')
208 pass
209
210 try:
211 import chardet
212 encoding = chardet.detect(unicode_)['encoding']
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198
220
199 return safe_str
221 return safe_str
200
222
General Comments 0
You need to be logged in to leave comments. Login now