##// END OF EJS Templates
hgweb: support constructing URLs from an alternate base URL...
Gregory Szorc -
r36916:219b2335 default
parent child Browse files
Show More
@@ -33,7 +33,6 b' from .. import ('
33 33 error,
34 34 hg,
35 35 profiling,
36 pycompat,
37 36 scmutil,
38 37 templater,
39 38 ui as uimod,
@@ -83,33 +82,6 b' def urlrepos(prefix, roothead, paths):'
83 82 yield (prefix + '/' +
84 83 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
85 84
86 def geturlcgivars(baseurl, port):
87 """
88 Extract CGI variables from baseurl
89
90 >>> geturlcgivars(b"http://host.org/base", b"80")
91 ('host.org', '80', '/base')
92 >>> geturlcgivars(b"http://host.org:8000/base", b"80")
93 ('host.org', '8000', '/base')
94 >>> geturlcgivars(b'/base', 8000)
95 ('', '8000', '/base')
96 >>> geturlcgivars(b"base", b'8000')
97 ('', '8000', '/base')
98 >>> geturlcgivars(b"http://host", b'8000')
99 ('host', '8000', '/')
100 >>> geturlcgivars(b"http://host/", b'8000')
101 ('host', '8000', '/')
102 """
103 u = util.url(baseurl)
104 name = u.host or ''
105 if u.port:
106 port = u.port
107 path = u.path or ""
108 if not path.startswith('/'):
109 path = '/' + path
110
111 return name, pycompat.bytestr(port), path
112
113 85 def readallowed(ui, req):
114 86 """Check allow_read and deny_read config options of a repo's ui object
115 87 to determine user permissions. By default, with neither option set (or
@@ -359,7 +331,6 b' class hgwebdir(object):'
359 331 self.stripecount = self.ui.config('web', 'stripes')
360 332 if self.stripecount:
361 333 self.stripecount = int(self.stripecount)
362 self._baseurl = self.ui.config('web', 'baseurl')
363 334 prefix = self.ui.config('web', 'prefix')
364 335 if prefix.startswith('/'):
365 336 prefix = prefix[1:]
@@ -376,7 +347,8 b' class hgwebdir(object):'
376 347 wsgicgi.launch(self)
377 348
378 349 def __call__(self, env, respond):
379 wsgireq = requestmod.wsgirequest(env, respond)
350 baseurl = self.ui.config('web', 'baseurl')
351 wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl)
380 352 return self.run_wsgi(wsgireq)
381 353
382 354 def run_wsgi(self, wsgireq):
@@ -455,7 +427,8 b' class hgwebdir(object):'
455 427 # Re-parse the WSGI environment to take into account our
456 428 # repository path component.
457 429 wsgireq.req = requestmod.parserequestfromenv(
458 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo)
430 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo,
431 altbaseurl=self.ui.config('web', 'baseurl'))
459 432 try:
460 433 # ensure caller gets private copy of ui
461 434 repo = hg.repository(self.ui.copy(), real)
@@ -502,7 +475,6 b' class hgwebdir(object):'
502 475 for column in sortable]
503 476
504 477 self.refresh()
505 self.updatereqenv(wsgireq.env)
506 478
507 479 entries = indexentries(self.ui, self.repos, wsgireq, req,
508 480 self.stripecount, sortcolumn=sortcolumn,
@@ -524,8 +496,6 b' class hgwebdir(object):'
524 496 def config(section, name, default=uimod._unset, untrusted=True):
525 497 return self.ui.config(section, name, default, untrusted)
526 498
527 self.updatereqenv(wsgireq.env)
528
529 499 url = wsgireq.env.get('SCRIPT_NAME', '')
530 500 if not url.endswith('/'):
531 501 url += '/'
@@ -557,10 +527,3 b' class hgwebdir(object):'
557 527 }
558 528 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
559 529 return tmpl
560
561 def updatereqenv(self, env):
562 if self._baseurl is not None:
563 name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
564 env['SERVER_NAME'] = name
565 env['SERVER_PORT'] = port
566 env['SCRIPT_NAME'] = path
@@ -157,7 +157,7 b' class parsedrequest(object):'
157 157 # Request body input stream.
158 158 bodyfh = attr.ib()
159 159
160 def parserequestfromenv(env, bodyfh, reponame=None):
160 def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
161 161 """Parse URL components from environment variables.
162 162
163 163 WSGI defines request attributes via environment variables. This function
@@ -167,8 +167,18 b' def parserequestfromenv(env, bodyfh, rep'
167 167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
168 168 This simulates the world view of a WSGI application that processes
169 169 requests from the base URL of a repo.
170
171 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
172 is defined, it is used - instead of the WSGI environment variables - for
173 constructing URL components up to and including the WSGI application path.
174 For example, if the current WSGI application is at ``/repo`` and a request
175 is made to ``/rev/@`` with this argument set to
176 ``http://myserver:9000/prefix``, the URL and path components will resolve as
177 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
178 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
179 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
170 180 """
171 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
181 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
172 182
173 183 # We first validate that the incoming object conforms with the WSGI spec.
174 184 # We only want to be dealing with spec-conforming WSGI implementations.
@@ -184,20 +194,27 b' def parserequestfromenv(env, bodyfh, rep'
184 194 env = {k: v.encode('latin-1') if isinstance(v, str) else v
185 195 for k, v in env.iteritems()}
186 196
197 if altbaseurl:
198 altbaseurl = util.url(altbaseurl)
199
187 200 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
188 201 # the environment variables.
189 202 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
190 203 # how URLs are reconstructed.
191 204 fullurl = env['wsgi.url_scheme'] + '://'
192 advertisedfullurl = fullurl
205
206 if altbaseurl and altbaseurl.scheme:
207 advertisedfullurl = altbaseurl.scheme + '://'
208 else:
209 advertisedfullurl = fullurl
193 210
194 def addport(s):
195 if env['wsgi.url_scheme'] == 'https':
196 if env['SERVER_PORT'] != '443':
197 s += ':' + env['SERVER_PORT']
211 def addport(s, port):
212 if s.startswith('https://'):
213 if port != '443':
214 s += ':' + port
198 215 else:
199 if env['SERVER_PORT'] != '80':
200 s += ':' + env['SERVER_PORT']
216 if port != '80':
217 s += ':' + port
201 218
202 219 return s
203 220
@@ -205,17 +222,39 b' def parserequestfromenv(env, bodyfh, rep'
205 222 fullurl += env['HTTP_HOST']
206 223 else:
207 224 fullurl += env['SERVER_NAME']
208 fullurl = addport(fullurl)
225 fullurl = addport(fullurl, env['SERVER_PORT'])
226
227 if altbaseurl and altbaseurl.host:
228 advertisedfullurl += altbaseurl.host
209 229
210 advertisedfullurl += env['SERVER_NAME']
211 advertisedfullurl = addport(advertisedfullurl)
230 if altbaseurl.port:
231 port = altbaseurl.port
232 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
233 port = '80'
234 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
235 port = '443'
236 else:
237 port = env['SERVER_PORT']
238
239 advertisedfullurl = addport(advertisedfullurl, port)
240 else:
241 advertisedfullurl += env['SERVER_NAME']
242 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
212 243
213 244 baseurl = fullurl
214 245 advertisedbaseurl = advertisedfullurl
215 246
216 247 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
217 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
218 248 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
249
250 if altbaseurl:
251 path = altbaseurl.path or ''
252 if path and not path.startswith('/'):
253 path = '/' + path
254 advertisedfullurl += util.urlreq.quote(path)
255 else:
256 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
257
219 258 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
220 259
221 260 if env.get('QUERY_STRING'):
@@ -226,7 +265,12 b' def parserequestfromenv(env, bodyfh, rep'
226 265 # that represents the repository being dispatched to. When computing
227 266 # the dispatch info, we ignore these leading path components.
228 267
229 apppath = env.get('SCRIPT_NAME', '')
268 if altbaseurl:
269 apppath = altbaseurl.path or ''
270 if apppath and not apppath.startswith('/'):
271 apppath = '/' + apppath
272 else:
273 apppath = env.get('SCRIPT_NAME', '')
230 274
231 275 if reponame:
232 276 repoprefix = '/' + reponame.strip('/')
@@ -545,7 +589,7 b' class wsgirequest(object):'
545 589 instantiate instances of this class, which provides higher-level APIs
546 590 for obtaining request parameters, writing HTTP output, etc.
547 591 """
548 def __init__(self, wsgienv, start_response):
592 def __init__(self, wsgienv, start_response, altbaseurl=None):
549 593 version = wsgienv[r'wsgi.version']
550 594 if (version < (1, 0)) or (version >= (2, 0)):
551 595 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
@@ -563,7 +607,7 b' class wsgirequest(object):'
563 607 self.multiprocess = wsgienv[r'wsgi.multiprocess']
564 608 self.run_once = wsgienv[r'wsgi.run_once']
565 609 self.env = wsgienv
566 self.req = parserequestfromenv(wsgienv, inp)
610 self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
567 611 self.res = wsgiresponse(self.req, start_response)
568 612 self._start_response = start_response
569 613 self.server_write = None
@@ -23,11 +23,12 b' DEFAULT_ENV = {'
23 23 r'wsgi.run_once': False,
24 24 }
25 25
26 def parse(env, bodyfh=None, reponame=None, extra=None):
26 def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None):
27 27 env = dict(env)
28 28 env.update(extra or {})
29 29
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame)
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame,
31 altbaseurl=altbaseurl)
31 32
32 33 class ParseRequestTests(unittest.TestCase):
33 34 def testdefault(self):
@@ -242,6 +243,174 b' class ParseRequestTests(unittest.TestCas'
242 243 self.assertEqual(r.dispatchpath, b'path1/path2')
243 244 self.assertEqual(r.reponame, b'prefix/repo')
244 245
246 def testaltbaseurl(self):
247 # Simple hostname remap.
248 r = parse(DEFAULT_ENV, altbaseurl='http://altserver')
249
250 self.assertEqual(r.url, b'http://testserver')
251 self.assertEqual(r.baseurl, b'http://testserver')
252 self.assertEqual(r.advertisedurl, b'http://altserver')
253 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
254 self.assertEqual(r.urlscheme, b'http')
255 self.assertEqual(r.apppath, b'')
256 self.assertEqual(r.dispatchparts, [])
257 self.assertIsNone(r.dispatchpath)
258 self.assertIsNone(r.reponame)
259
260 # With a custom port.
261 r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000')
262 self.assertEqual(r.url, b'http://testserver')
263 self.assertEqual(r.baseurl, b'http://testserver')
264 self.assertEqual(r.advertisedurl, b'http://altserver:8000')
265 self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
266 self.assertEqual(r.urlscheme, b'http')
267 self.assertEqual(r.apppath, b'')
268 self.assertEqual(r.dispatchparts, [])
269 self.assertIsNone(r.dispatchpath)
270 self.assertIsNone(r.reponame)
271
272 # With a changed protocol.
273 r = parse(DEFAULT_ENV, altbaseurl='https://altserver')
274 self.assertEqual(r.url, b'http://testserver')
275 self.assertEqual(r.baseurl, b'http://testserver')
276 self.assertEqual(r.advertisedurl, b'https://altserver')
277 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
278 # URL scheme is defined as the actual scheme, not advertised.
279 self.assertEqual(r.urlscheme, b'http')
280 self.assertEqual(r.apppath, b'')
281 self.assertEqual(r.dispatchparts, [])
282 self.assertIsNone(r.dispatchpath)
283 self.assertIsNone(r.reponame)
284
285 # Need to specify explicit port number for proper https:// alt URLs.
286 r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443')
287 self.assertEqual(r.url, b'http://testserver')
288 self.assertEqual(r.baseurl, b'http://testserver')
289 self.assertEqual(r.advertisedurl, b'https://altserver')
290 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
291 self.assertEqual(r.urlscheme, b'http')
292 self.assertEqual(r.apppath, b'')
293 self.assertEqual(r.dispatchparts, [])
294 self.assertIsNone(r.dispatchpath)
295 self.assertIsNone(r.reponame)
296
297 # With only PATH_INFO defined.
298 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
299 r'PATH_INFO': r'/path1/path2',
300 })
301 self.assertEqual(r.url, b'http://testserver/path1/path2')
302 self.assertEqual(r.baseurl, b'http://testserver')
303 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
304 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
305 self.assertEqual(r.urlscheme, b'http')
306 self.assertEqual(r.apppath, b'')
307 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
308 self.assertEqual(r.dispatchpath, b'path1/path2')
309 self.assertIsNone(r.reponame)
310
311 # Path on alt URL.
312 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath')
313 self.assertEqual(r.url, b'http://testserver')
314 self.assertEqual(r.baseurl, b'http://testserver')
315 self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
316 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
317 self.assertEqual(r.urlscheme, b'http')
318 self.assertEqual(r.apppath, b'/altpath')
319 self.assertEqual(r.dispatchparts, [])
320 self.assertIsNone(r.dispatchpath)
321 self.assertIsNone(r.reponame)
322
323 # With a trailing slash.
324 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/')
325 self.assertEqual(r.url, b'http://testserver')
326 self.assertEqual(r.baseurl, b'http://testserver')
327 self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
328 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
329 self.assertEqual(r.urlscheme, b'http')
330 self.assertEqual(r.apppath, b'/altpath/')
331 self.assertEqual(r.dispatchparts, [])
332 self.assertIsNone(r.dispatchpath)
333 self.assertIsNone(r.reponame)
334
335 # PATH_INFO + path on alt URL.
336 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={
337 r'PATH_INFO': r'/path1/path2',
338 })
339 self.assertEqual(r.url, b'http://testserver/path1/path2')
340 self.assertEqual(r.baseurl, b'http://testserver')
341 self.assertEqual(r.advertisedurl,
342 b'http://altserver/altpath/path1/path2')
343 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
344 self.assertEqual(r.urlscheme, b'http')
345 self.assertEqual(r.apppath, b'/altpath')
346 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
347 self.assertEqual(r.dispatchpath, b'path1/path2')
348 self.assertIsNone(r.reponame)
349
350 # PATH_INFO + path on alt URL with trailing slash.
351 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={
352 r'PATH_INFO': r'/path1/path2',
353 })
354 self.assertEqual(r.url, b'http://testserver/path1/path2')
355 self.assertEqual(r.baseurl, b'http://testserver')
356 self.assertEqual(r.advertisedurl,
357 b'http://altserver/altpath//path1/path2')
358 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
359 self.assertEqual(r.urlscheme, b'http')
360 self.assertEqual(r.apppath, b'/altpath/')
361 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
362 self.assertEqual(r.dispatchpath, b'path1/path2')
363 self.assertIsNone(r.reponame)
364
365 # Local SCRIPT_NAME is ignored.
366 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
367 r'SCRIPT_NAME': r'/script',
368 r'PATH_INFO': r'/path1/path2',
369 })
370 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
371 self.assertEqual(r.baseurl, b'http://testserver')
372 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
373 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
374 self.assertEqual(r.urlscheme, b'http')
375 self.assertEqual(r.apppath, b'')
376 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
377 self.assertEqual(r.dispatchpath, b'path1/path2')
378 self.assertIsNone(r.reponame)
379
380 # Use remote's path for script name, app path
381 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={
382 r'SCRIPT_NAME': r'/script',
383 r'PATH_INFO': r'/path1/path2',
384 })
385 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
386 self.assertEqual(r.baseurl, b'http://testserver')
387 self.assertEqual(r.advertisedurl,
388 b'http://altserver/altroot/path1/path2')
389 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
390 self.assertEqual(r.urlscheme, b'http')
391 self.assertEqual(r.apppath, b'/altroot')
392 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
393 self.assertEqual(r.dispatchpath, b'path1/path2')
394 self.assertIsNone(r.reponame)
395
396 # reponame is factored in properly.
397 r = parse(DEFAULT_ENV, reponame=b'repo',
398 altbaseurl='http://altserver/altroot',
399 extra={
400 r'SCRIPT_NAME': r'/script',
401 r'PATH_INFO': r'/repo/path1/path2',
402 })
403
404 self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
405 self.assertEqual(r.baseurl, b'http://testserver')
406 self.assertEqual(r.advertisedurl,
407 b'http://altserver/altroot/repo/path1/path2')
408 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
409 self.assertEqual(r.apppath, b'/altroot/repo')
410 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
411 self.assertEqual(r.dispatchpath, b'path1/path2')
412 self.assertEqual(r.reponame, b'repo')
413
245 414 if __name__ == '__main__':
246 415 import silenttestrunner
247 416 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now