helpers: unicode/str add flag to control usage of chardet....
marcink -
r4109:19a6ab7e default
Not Reviewed
Show More
Add another comment
TODOs: 0 unresolved 0 Resolved
COMMENTS: 0 General 0 Inline
@@ -207,7 +207,7
207 return val
207 return val
208
208
209
209
210 def safe_unicode(str_, from_encoding=None):
210 def safe_unicode(str_, from_encoding=None, use_chardet=False):
211 """
211 """
212 safe unicode function. Does few trick to turn str_ into unicode
212 safe unicode function. Does few trick to turn str_ into unicode
213
213
@@ -240,17 +240,19
240 except UnicodeDecodeError:
240 except UnicodeDecodeError:
241 pass
241 pass
242
242
243 try:
243 if use_chardet:
244 import chardet
244 try:
245 encoding = chardet.detect(str_)['encoding']
245 import chardet
246 if encoding is None:
246 encoding = chardet.detect(str_)['encoding']
247 raise Exception()
247 if encoding is None:
248 return str_.decode(encoding)
248 raise Exception()
249 except (ImportError, UnicodeDecodeError, Exception):
249 return str_.decode(encoding)
250 except (ImportError, UnicodeDecodeError, Exception):
251 return unicode(str_, from_encoding[0], 'replace')
252 else:
250 return unicode(str_, from_encoding[0], 'replace')
253 return unicode(str_, from_encoding[0], 'replace')
251
254
252
255 def safe_str(unicode_, to_encoding=None, use_chardet=False):
253 def safe_str(unicode_, to_encoding=None):
254 """
256 """
255 safe str function. Does few trick to turn unicode_ into string
257 safe str function. Does few trick to turn unicode_ into string
256
258
@@ -283,14 +285,17
283 except UnicodeEncodeError:
285 except UnicodeEncodeError:
284 pass
286 pass
285
287
286 try:
288 if use_chardet:
287 import chardet
289 try:
288 encoding = chardet.detect(unicode_)['encoding']
290 import chardet
289 if encoding is None:
291 encoding = chardet.detect(unicode_)['encoding']
290 raise UnicodeEncodeError()
292 if encoding is None:
293 raise UnicodeEncodeError()
291
294
292 return unicode_.encode(encoding)
295 return unicode_.encode(encoding)
293 except (ImportError, UnicodeEncodeError):
296 except (ImportError, UnicodeEncodeError):
297 return unicode_.encode(to_encoding[0], 'replace')
298 else:
294 return unicode_.encode(to_encoding[0], 'replace')
299 return unicode_.encode(to_encoding[0], 'replace')
295
300
296
301
Comments 0
You need to be logged in to leave comments. Login now