diff --git a/pylons_app/lib/helpers.py b/pylons_app/lib/helpers.py --- a/pylons_app/lib/helpers.py +++ b/pylons_app/lib/helpers.py @@ -336,3 +336,19 @@ def gravatar_url(email_address, size=30) gravatar_url += urllib.urlencode({'d':default, 's':str(size)}) return gravatar_url + +def safe_unicode(str): + """safe unicode function. In case of UnicodeDecode error we try to return + unicode with errors replace, if this failes we return unicode with + string_escape decoding """ + + try: + u_str = unicode(str) + except UnicodeDecodeError: + try: + u_str = unicode(str, 'utf-8', 'replace') + except UnicodeDecodeError: + #incase we have a decode error just represent as byte string + u_str = unicode(str(str).encode('string_escape')) + + return u_str \ No newline at end of file diff --git a/pylons_app/lib/indexers/daemon.py b/pylons_app/lib/indexers/daemon.py --- a/pylons_app/lib/indexers/daemon.py +++ b/pylons_app/lib/indexers/daemon.py @@ -36,6 +36,7 @@ from pidlock import LockHeld, DaemonLock import traceback from pylons_app.config.environment import load_environment from pylons_app.model.hg_model import HgModel +from pylons_app.lib.helpers import safe_unicode from whoosh.index import create_in, open_dir from shutil import rmtree from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ @@ -77,11 +78,7 @@ class WhooshIndexingDaemon(object): fobj = open(path, 'rb') content = fobj.read() fobj.close() - try: - u_content = unicode(content) - except UnicodeDecodeError: - #incase we have a decode error just represent as byte string - u_content = unicode(str(content).encode('string_escape')) + u_content = safe_unicode(content) else: log.debug(' >> %s' % path) #just index file name without it's content