diff --git a/kallithea/config/routing.py b/kallithea/config/routing.py --- a/kallithea/config/routing.py +++ b/kallithea/config/routing.py @@ -19,14 +19,34 @@ may take precedent over the more generic refer to the routes manual at http://routes.groovie.org/docs/ """ -from routes import Mapper +import routes from tg import request +from kallithea.lib.utils2 import safe_str + # prefix for non repository related links needs to be prefixed with `/` ADMIN_PREFIX = '/_admin' +class Mapper(routes.Mapper): + """ + Subclassed Mapper with routematch patched to decode "unicode" str url to + *real* unicode str before applying matches and invoking controller methods. + """ + + def routematch(self, url=None, environ=None): + """ + routematch that also decode url from "fake bytes" to real unicode + string before matching and invoking controllers. + """ + # Process url like get_path_info does ... but PATH_INFO has already + # been retrieved from environ and is passed, so - let's just use that + # instead. + url = safe_str(url.encode('latin1')) + return super().routematch(url=url, environ=environ) + + def make_map(config): """Create, configure and return the routes Mapper""" rmap = Mapper(directory=config['paths']['controllers'], diff --git a/kallithea/lib/base.py b/kallithea/lib/base.py --- a/kallithea/lib/base.py +++ b/kallithea/lib/base.py @@ -97,12 +97,17 @@ def _get_ip_addr(environ): def get_path_info(environ): - """Return unicode PATH_INFO from environ ... using tg.original_request if available. + """Return PATH_INFO from environ ... using tg.original_request if available. + + In Python 3 WSGI, PATH_INFO is a unicode str, but kind of contains encoded + bytes. The code points are guaranteed to only use the lower 8 bit bits, and + encoding the string with the 1:1 encoding latin1 will give the + corresponding byte string ... which then can be decoded to proper unicode. """ org_req = environ.get('tg.original_request') if org_req is not None: environ = org_req.environ - return safe_str(environ['PATH_INFO']) + return safe_str(environ['PATH_INFO'].encode('latin1')) def log_in_user(user, remember, is_external_auth, ip_addr): diff --git a/kallithea/lib/middleware/permanent_repo_url.py b/kallithea/lib/middleware/permanent_repo_url.py --- a/kallithea/lib/middleware/permanent_repo_url.py +++ b/kallithea/lib/middleware/permanent_repo_url.py @@ -33,9 +33,9 @@ class PermanentRepoUrl(object): def __call__(self, environ, start_response): # Extract path_info as get_path_info does, but do it explicitly because # we also have to do the reverse operation when patching it back in - path_info = safe_str(environ['PATH_INFO']) + path_info = safe_str(environ['PATH_INFO'].encode('latin1')) if path_info.startswith('/'): # it must path_info = '/' + fix_repo_id_name(path_info[1:]) - environ['PATH_INFO'] = safe_bytes(path_info) + environ['PATH_INFO'] = safe_bytes(path_info).decode('latin1') return self.application(environ, start_response)