# HG changeset patch # User Marcin Kuzminski # Date 2020-03-16 14:08:09 # Node ID dc8df3e0e1bf84df3c58a7307168f855d4455479 # Parent 58907ccac5c76a5054b422753067251f26712589 git: use non-unicode author extraction as it's returned as bytes from backend, and we can get an unicode errors while there's some non-ascii characters. diff --git a/vcsserver/git.py b/vcsserver/git.py --- a/vcsserver/git.py +++ b/vcsserver/git.py @@ -39,7 +39,7 @@ from dulwich.repo import Repo as Dulwich from dulwich.server import update_server_info from vcsserver import exceptions, settings, subprocessio -from vcsserver.utils import safe_str, safe_int +from vcsserver.utils import safe_str, safe_int, safe_unicode from vcsserver.base import RepoFactory, obfuscate_qs from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) @@ -840,7 +840,11 @@ class GitRemote(RemoteBase): if author.email: return u"{} <{}>".format(author.name, author.email) - return u"{}".format(author.raw_name) + try: + return u"{}".format(author.name) + except Exception: + return u"{}".format(safe_unicode(author.raw_name)) + return _author(repo_id, commit_id) @reraise_safe_exceptions diff --git a/vcsserver/utils.py b/vcsserver/utils.py --- a/vcsserver/utils.py +++ b/vcsserver/utils.py @@ -37,18 +37,16 @@ def safe_int(val, default=None): return val -def safe_str(unicode_, to_encoding=['utf8']): +def safe_str(unicode_, to_encoding=None): """ safe str function. Does few trick to turn unicode_ into string - In case of UnicodeEncodeError, we try to return it with encoding detected - by chardet library if it fails fallback to string with errors replaced - :param unicode_: unicode to encode + :param to_encoding: encode to this type UTF8 default :rtype: str :returns: str object """ - + to_encoding = to_encoding or ['utf8'] # if it's not basestr cast to str if not isinstance(unicode_, basestring): return str(unicode_) @@ -65,15 +63,38 @@ def safe_str(unicode_, to_encoding=['utf except UnicodeEncodeError: pass + return unicode_.encode(to_encoding[0], 'replace') + + +def safe_unicode(str_, from_encoding=None): + """ + safe unicode function. Does few trick to turn str_ into unicode + + :param str_: string to decode + :param from_encoding: encode from this type UTF8 default + :rtype: unicode + :returns: unicode object + """ + from_encoding = from_encoding or ['utf8'] + + if isinstance(str_, unicode): + return str_ + + if not isinstance(from_encoding, (list, tuple)): + from_encoding = [from_encoding] + try: - import chardet - encoding = chardet.detect(unicode_)['encoding'] - if encoding is None: - raise UnicodeEncodeError() + return unicode(str_) + except UnicodeDecodeError: + pass - return unicode_.encode(encoding) - except (ImportError, UnicodeEncodeError): - return unicode_.encode(to_encoding[0], 'replace') + for enc in from_encoding: + try: + return unicode(str_, enc) + except UnicodeDecodeError: + pass + + return unicode(str_, from_encoding[0], 'replace') class AttributeDict(dict):