##// END OF EJS Templates
added safe unicode funtion, and implemented it in whoosh indexer
marcink -
r443:e5157e2a default
parent child Browse files
Show More
@@ -336,3 +336,19 b' def gravatar_url(email_address, size=30)'
336 336 gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
337 337
338 338 return gravatar_url
339
340 def safe_unicode(str):
341 """safe unicode function. In case of UnicodeDecode error we try to return
342 unicode with errors replace, if this failes we return unicode with
343 string_escape decoding """
344
345 try:
346 u_str = unicode(str)
347 except UnicodeDecodeError:
348 try:
349 u_str = unicode(str, 'utf-8', 'replace')
350 except UnicodeDecodeError:
351 #incase we have a decode error just represent as byte string
352 u_str = unicode(str(str).encode('string_escape'))
353
354 return u_str No newline at end of file
@@ -36,6 +36,7 b' from pidlock import LockHeld, DaemonLock'
36 36 import traceback
37 37 from pylons_app.config.environment import load_environment
38 38 from pylons_app.model.hg_model import HgModel
39 from pylons_app.lib.helpers import safe_unicode
39 40 from whoosh.index import create_in, open_dir
40 41 from shutil import rmtree
41 42 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
@@ -77,11 +78,7 b' class WhooshIndexingDaemon(object):'
77 78 fobj = open(path, 'rb')
78 79 content = fobj.read()
79 80 fobj.close()
80 try:
81 u_content = unicode(content)
82 except UnicodeDecodeError:
83 #incase we have a decode error just represent as byte string
84 u_content = unicode(str(content).encode('string_escape'))
81 u_content = safe_unicode(content)
85 82 else:
86 83 log.debug(' >> %s' % path)
87 84 #just index file name without it's content
General Comments 0
You need to be logged in to leave comments. Login now