diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py --- a/mercurial/hgweb/hgwebdir_mod.py +++ b/mercurial/hgweb/hgwebdir_mod.py @@ -33,7 +33,6 @@ from .. import ( error, hg, profiling, - pycompat, scmutil, templater, ui as uimod, @@ -83,33 +82,6 @@ def urlrepos(prefix, roothead, paths): yield (prefix + '/' + util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path -def geturlcgivars(baseurl, port): - """ - Extract CGI variables from baseurl - - >>> geturlcgivars(b"http://host.org/base", b"80") - ('host.org', '80', '/base') - >>> geturlcgivars(b"http://host.org:8000/base", b"80") - ('host.org', '8000', '/base') - >>> geturlcgivars(b'/base', 8000) - ('', '8000', '/base') - >>> geturlcgivars(b"base", b'8000') - ('', '8000', '/base') - >>> geturlcgivars(b"http://host", b'8000') - ('host', '8000', '/') - >>> geturlcgivars(b"http://host/", b'8000') - ('host', '8000', '/') - """ - u = util.url(baseurl) - name = u.host or '' - if u.port: - port = u.port - path = u.path or "" - if not path.startswith('/'): - path = '/' + path - - return name, pycompat.bytestr(port), path - def readallowed(ui, req): """Check allow_read and deny_read config options of a repo's ui object to determine user permissions. By default, with neither option set (or @@ -359,7 +331,6 @@ class hgwebdir(object): self.stripecount = self.ui.config('web', 'stripes') if self.stripecount: self.stripecount = int(self.stripecount) - self._baseurl = self.ui.config('web', 'baseurl') prefix = self.ui.config('web', 'prefix') if prefix.startswith('/'): prefix = prefix[1:] @@ -376,7 +347,8 @@ class hgwebdir(object): wsgicgi.launch(self) def __call__(self, env, respond): - wsgireq = requestmod.wsgirequest(env, respond) + baseurl = self.ui.config('web', 'baseurl') + wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl) return self.run_wsgi(wsgireq) def run_wsgi(self, wsgireq): @@ -455,7 +427,8 @@ class hgwebdir(object): # Re-parse the WSGI environment to take into account our # repository path component. wsgireq.req = requestmod.parserequestfromenv( - wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo) + wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo, + altbaseurl=self.ui.config('web', 'baseurl')) try: # ensure caller gets private copy of ui repo = hg.repository(self.ui.copy(), real) @@ -502,7 +475,6 @@ class hgwebdir(object): for column in sortable] self.refresh() - self.updatereqenv(wsgireq.env) entries = indexentries(self.ui, self.repos, wsgireq, req, self.stripecount, sortcolumn=sortcolumn, @@ -524,8 +496,6 @@ class hgwebdir(object): def config(section, name, default=uimod._unset, untrusted=True): return self.ui.config(section, name, default, untrusted) - self.updatereqenv(wsgireq.env) - url = wsgireq.env.get('SCRIPT_NAME', '') if not url.endswith('/'): url += '/' @@ -557,10 +527,3 @@ class hgwebdir(object): } tmpl = templater.templater.frommapfile(mapfile, defaults=defaults) return tmpl - - def updatereqenv(self, env): - if self._baseurl is not None: - name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT']) - env['SERVER_NAME'] = name - env['SERVER_PORT'] = port - env['SCRIPT_NAME'] = path diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -157,7 +157,7 @@ class parsedrequest(object): # Request body input stream. bodyfh = attr.ib() -def parserequestfromenv(env, bodyfh, reponame=None): +def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None): """Parse URL components from environment variables. WSGI defines request attributes via environment variables. This function @@ -167,8 +167,18 @@ def parserequestfromenv(env, bodyfh, rep string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. This simulates the world view of a WSGI application that processes requests from the base URL of a repo. + + If ``altbaseurl`` (typically comes from ``web.baseurl`` config option) + is defined, it is used - instead of the WSGI environment variables - for + constructing URL components up to and including the WSGI application path. + For example, if the current WSGI application is at ``/repo`` and a request + is made to ``/rev/@`` with this argument set to + ``http://myserver:9000/prefix``, the URL and path components will resolve as + if the request were to ``http://myserver:9000/prefix/rev/@``. In other + words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and + ``SCRIPT_NAME`` are all effectively replaced by components from this URL. """ - # PEP-0333 defines the WSGI spec and is a useful reference for this code. + # PEP 3333 defines the WSGI spec and is a useful reference for this code. # We first validate that the incoming object conforms with the WSGI spec. # We only want to be dealing with spec-conforming WSGI implementations. @@ -184,20 +194,27 @@ def parserequestfromenv(env, bodyfh, rep env = {k: v.encode('latin-1') if isinstance(v, str) else v for k, v in env.iteritems()} + if altbaseurl: + altbaseurl = util.url(altbaseurl) + # https://www.python.org/dev/peps/pep-0333/#environ-variables defines # the environment variables. # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines # how URLs are reconstructed. fullurl = env['wsgi.url_scheme'] + '://' - advertisedfullurl = fullurl + + if altbaseurl and altbaseurl.scheme: + advertisedfullurl = altbaseurl.scheme + '://' + else: + advertisedfullurl = fullurl - def addport(s): - if env['wsgi.url_scheme'] == 'https': - if env['SERVER_PORT'] != '443': - s += ':' + env['SERVER_PORT'] + def addport(s, port): + if s.startswith('https://'): + if port != '443': + s += ':' + port else: - if env['SERVER_PORT'] != '80': - s += ':' + env['SERVER_PORT'] + if port != '80': + s += ':' + port return s @@ -205,17 +222,39 @@ def parserequestfromenv(env, bodyfh, rep fullurl += env['HTTP_HOST'] else: fullurl += env['SERVER_NAME'] - fullurl = addport(fullurl) + fullurl = addport(fullurl, env['SERVER_PORT']) + + if altbaseurl and altbaseurl.host: + advertisedfullurl += altbaseurl.host - advertisedfullurl += env['SERVER_NAME'] - advertisedfullurl = addport(advertisedfullurl) + if altbaseurl.port: + port = altbaseurl.port + elif altbaseurl.scheme == 'http' and not altbaseurl.port: + port = '80' + elif altbaseurl.scheme == 'https' and not altbaseurl.port: + port = '443' + else: + port = env['SERVER_PORT'] + + advertisedfullurl = addport(advertisedfullurl, port) + else: + advertisedfullurl += env['SERVER_NAME'] + advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT']) baseurl = fullurl advertisedbaseurl = advertisedfullurl fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) - advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) + + if altbaseurl: + path = altbaseurl.path or '' + if path and not path.startswith('/'): + path = '/' + path + advertisedfullurl += util.urlreq.quote(path) + else: + advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) + advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) if env.get('QUERY_STRING'): @@ -226,7 +265,12 @@ def parserequestfromenv(env, bodyfh, rep # that represents the repository being dispatched to. When computing # the dispatch info, we ignore these leading path components. - apppath = env.get('SCRIPT_NAME', '') + if altbaseurl: + apppath = altbaseurl.path or '' + if apppath and not apppath.startswith('/'): + apppath = '/' + apppath + else: + apppath = env.get('SCRIPT_NAME', '') if reponame: repoprefix = '/' + reponame.strip('/') @@ -545,7 +589,7 @@ class wsgirequest(object): instantiate instances of this class, which provides higher-level APIs for obtaining request parameters, writing HTTP output, etc. """ - def __init__(self, wsgienv, start_response): + def __init__(self, wsgienv, start_response, altbaseurl=None): version = wsgienv[r'wsgi.version'] if (version < (1, 0)) or (version >= (2, 0)): raise RuntimeError("Unknown and unsupported WSGI version %d.%d" @@ -563,7 +607,7 @@ class wsgirequest(object): self.multiprocess = wsgienv[r'wsgi.multiprocess'] self.run_once = wsgienv[r'wsgi.run_once'] self.env = wsgienv - self.req = parserequestfromenv(wsgienv, inp) + self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl) self.res = wsgiresponse(self.req, start_response) self._start_response = start_response self.server_write = None diff --git a/tests/test-wsgirequest.py b/tests/test-wsgirequest.py --- a/tests/test-wsgirequest.py +++ b/tests/test-wsgirequest.py @@ -23,11 +23,12 @@ DEFAULT_ENV = { r'wsgi.run_once': False, } -def parse(env, bodyfh=None, reponame=None, extra=None): +def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None): env = dict(env) env.update(extra or {}) - return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame) + return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame, + altbaseurl=altbaseurl) class ParseRequestTests(unittest.TestCase): def testdefault(self): @@ -242,6 +243,174 @@ class ParseRequestTests(unittest.TestCas self.assertEqual(r.dispatchpath, b'path1/path2') self.assertEqual(r.reponame, b'prefix/repo') + def testaltbaseurl(self): + # Simple hostname remap. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver') + + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # With a custom port. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000') + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver:8000') + self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # With a changed protocol. + r = parse(DEFAULT_ENV, altbaseurl='https://altserver') + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'https://altserver') + self.assertEqual(r.advertisedbaseurl, b'https://altserver') + # URL scheme is defined as the actual scheme, not advertised. + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # Need to specify explicit port number for proper https:// alt URLs. + r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443') + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'https://altserver') + self.assertEqual(r.advertisedbaseurl, b'https://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # With only PATH_INFO defined. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={ + r'PATH_INFO': r'/path1/path2', + }) + self.assertEqual(r.url, b'http://testserver/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertIsNone(r.reponame) + + # Path on alt URL. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath') + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver/altpath') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'/altpath') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # With a trailing slash. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/') + self.assertEqual(r.url, b'http://testserver') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver/altpath/') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'/altpath/') + self.assertEqual(r.dispatchparts, []) + self.assertIsNone(r.dispatchpath) + self.assertIsNone(r.reponame) + + # PATH_INFO + path on alt URL. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={ + r'PATH_INFO': r'/path1/path2', + }) + self.assertEqual(r.url, b'http://testserver/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, + b'http://altserver/altpath/path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'/altpath') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertIsNone(r.reponame) + + # PATH_INFO + path on alt URL with trailing slash. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={ + r'PATH_INFO': r'/path1/path2', + }) + self.assertEqual(r.url, b'http://testserver/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, + b'http://altserver/altpath//path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'/altpath/') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertIsNone(r.reponame) + + # Local SCRIPT_NAME is ignored. + r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={ + r'SCRIPT_NAME': r'/script', + r'PATH_INFO': r'/path1/path2', + }) + self.assertEqual(r.url, b'http://testserver/script/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertIsNone(r.reponame) + + # Use remote's path for script name, app path + r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={ + r'SCRIPT_NAME': r'/script', + r'PATH_INFO': r'/path1/path2', + }) + self.assertEqual(r.url, b'http://testserver/script/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, + b'http://altserver/altroot/path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.urlscheme, b'http') + self.assertEqual(r.apppath, b'/altroot') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertIsNone(r.reponame) + + # reponame is factored in properly. + r = parse(DEFAULT_ENV, reponame=b'repo', + altbaseurl='http://altserver/altroot', + extra={ + r'SCRIPT_NAME': r'/script', + r'PATH_INFO': r'/repo/path1/path2', + }) + + self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2') + self.assertEqual(r.baseurl, b'http://testserver') + self.assertEqual(r.advertisedurl, + b'http://altserver/altroot/repo/path1/path2') + self.assertEqual(r.advertisedbaseurl, b'http://altserver') + self.assertEqual(r.apppath, b'/altroot/repo') + self.assertEqual(r.dispatchparts, [b'path1', b'path2']) + self.assertEqual(r.dispatchpath, b'path1/path2') + self.assertEqual(r.reponame, b'repo') + if __name__ == '__main__': import silenttestrunner silenttestrunner.main(__name__)