##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -157,44 +157,66 b' def generate_api_key(username, salt=None'
157 157 return hashlib.sha1(username + salt).hexdigest()
158 158
159 159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
163 unicode with errors replaceed
162 safe unicode function. Does few trick to turn str_ into unicode
163
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
164 166
165 :param _str: string to decode
167 :param str_: string to decode
166 168 :rtype: unicode
167 169 :returns: unicode object
168 170 """
169 171
170 if isinstance(_str, unicode):
171 return _str
172 if isinstance(str_, unicode):
173 return str_
172 174
173 175 try:
174 u_str = unicode(_str, from_encoding)
176 return unicode(str_, from_encoding)
175 177 except UnicodeDecodeError:
176 u_str = unicode(_str, from_encoding, 'replace')
177
178 return u_str
179
178 pass
179
180 try:
181 import chardet
182 encoding = chardet.detect(str_)['encoding']
183 if encoding is None:
184 raise UnicodeDecodeError()
185
186 return str_.decode(encoding)
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180 189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 191 """
183 safe str function. In case of UnicodeEncode error we try to return
184 str with errors replaceed
192 safe str function. Does few trick to turn unicode_ into string
193
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
185 196
186 :param _unicode: unicode to encode
197 :param unicode_: unicode to encode
187 198 :rtype: str
188 199 :returns: str object
189 200 """
190 201
191 if isinstance(_unicode, str):
192 return _unicode
202 if isinstance(unicode_, str):
203 return unicode_
193 204
194 205 try:
195 safe_str = str(_unicode)
206 return str(unicode_)
196 207 except UnicodeEncodeError:
197 safe_str = _unicode.encode(to_encoding, 'replace')
208 pass
209
210 try:
211 import chardet
212 encoding = chardet.detect(unicode_)['encoding']
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198 220
199 221 return safe_str
200 222
General Comments 0
You need to be logged in to leave comments. Login now