# HG changeset patch # User Marcin Kuzminski # Date 2012-11-14 23:57:52 # Node ID 6e76b4892d727f60a1406db6a8152841cce5457d # Parent b13ca18ac5279a6db37fb6d9a43b64a6dc83f657 Implemented #647, option to pass list of default encoding used to encode to/decode from unicode diff --git a/development.ini b/development.ini --- a/development.ini +++ b/development.ini @@ -76,6 +76,8 @@ use_gravatar = true container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url diff --git a/production.ini b/production.ini --- a/production.ini +++ b/production.ini @@ -76,6 +76,8 @@ use_gravatar = true container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url diff --git a/rhodecode/config/deployment.ini_tmpl b/rhodecode/config/deployment.ini_tmpl --- a/rhodecode/config/deployment.ini_tmpl +++ b/rhodecode/config/deployment.ini_tmpl @@ -76,6 +76,8 @@ use_gravatar = true container_auth_enabled = false proxypass_auth_enabled = false +## default encoding used to convert from and to unicode +## can be also a comma seperated list of encoding in case of mixed encodings default_encoding = utf8 ## overwrite schema of clone url diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py --- a/rhodecode/lib/utils2.py +++ b/rhodecode/lib/utils2.py @@ -66,6 +66,7 @@ def __get_lem(): return dict(d) + def str2bool(_str): """ returs True/False value from given string, it tries to translate the @@ -83,6 +84,27 @@ def str2bool(_str): return _str in ('t', 'true', 'y', 'yes', 'on', '1') +def aslist(obj, sep=None, strip=True): + """ + Returns given string separated by sep as list + + :param obj: + :param sep: + :param strip: + """ + if isinstance(obj, (basestring)): + lst = obj.split(sep) + if strip: + lst = [v.strip() for v in lst] + return lst + elif isinstance(obj, (list, tuple)): + return obj + elif obj is None: + return [] + else: + return [obj] + + def convert_line_endings(line, mode): """ Converts a given line "line end" accordingly to given mode @@ -182,18 +204,23 @@ def safe_unicode(str_, from_encoding=Non if not from_encoding: import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8') - from_encoding = DEFAULT_ENCODING + DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', + 'utf8'), sep=',') + from_encoding = DEFAULT_ENCODINGS + + if not isinstance(from_encoding, (list, tuple)): + from_encoding = [from_encoding] try: return unicode(str_) except UnicodeDecodeError: pass - try: - return unicode(str_, from_encoding) - except UnicodeDecodeError: - pass + for enc in from_encoding: + try: + return unicode(str_, enc) + except UnicodeDecodeError: + pass try: import chardet @@ -202,7 +229,7 @@ def safe_unicode(str_, from_encoding=Non raise Exception() return str_.decode(encoding) except (ImportError, UnicodeDecodeError, Exception): - return unicode(str_, from_encoding, 'replace') + return unicode(str_, from_encoding[0], 'replace') def safe_str(unicode_, to_encoding=None): @@ -226,13 +253,18 @@ def safe_str(unicode_, to_encoding=None) if not to_encoding: import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8') - to_encoding = DEFAULT_ENCODING + DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding', + 'utf8'), sep=',') + to_encoding = DEFAULT_ENCODINGS - try: - return unicode_.encode(to_encoding) - except UnicodeEncodeError: - pass + if not isinstance(to_encoding, (list, tuple)): + to_encoding = [to_encoding] + + for enc in to_encoding: + try: + return unicode_.encode(enc) + except UnicodeEncodeError: + pass try: import chardet @@ -242,7 +274,7 @@ def safe_str(unicode_, to_encoding=None) return unicode_.encode(encoding) except (ImportError, UnicodeEncodeError): - return unicode_.encode(to_encoding, 'replace') + return unicode_.encode(to_encoding[0], 'replace') return safe_str diff --git a/rhodecode/lib/vcs/utils/__init__.py b/rhodecode/lib/vcs/utils/__init__.py --- a/rhodecode/lib/vcs/utils/__init__.py +++ b/rhodecode/lib/vcs/utils/__init__.py @@ -38,12 +38,12 @@ def safe_unicode(str_, from_encoding=Non :rtype: unicode :returns: unicode object """ + from rhodecode.lib.utils2 import safe_unicode + return safe_unicode(str_, from_encoding) + if isinstance(str_, unicode): return str_ - if not from_encoding: - import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8') - from_encoding = DEFAULT_ENCODING + try: return unicode(str_) except UnicodeDecodeError: @@ -75,13 +75,12 @@ def safe_str(unicode_, to_encoding=None) :rtype: str :returns: str object """ + from rhodecode.lib.utils2 import safe_str + return safe_str(unicode_, to_encoding) if isinstance(unicode_, str): return unicode_ - if not to_encoding: - import rhodecode - DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8') - to_encoding = DEFAULT_ENCODING + try: return unicode_.encode(to_encoding) except UnicodeEncodeError: