##// END OF EJS Templates
hgweb: remove wsgirequest (API)...
Gregory Szorc -
r36928:f0a85154 default
parent child Browse files
Show More
@@ -1,453 +1,453
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import contextlib
12 12 import os
13 13
14 14 from .common import (
15 15 ErrorResponse,
16 16 HTTP_BAD_REQUEST,
17 17 cspvalues,
18 18 permhooks,
19 19 statusmessage,
20 20 )
21 21
22 22 from .. import (
23 23 encoding,
24 24 error,
25 25 formatter,
26 26 hg,
27 27 hook,
28 28 profiling,
29 29 pycompat,
30 30 repoview,
31 31 templatefilters,
32 32 templater,
33 33 ui as uimod,
34 34 util,
35 35 wireprotoserver,
36 36 )
37 37
38 38 from . import (
39 39 request as requestmod,
40 40 webcommands,
41 41 webutil,
42 42 wsgicgi,
43 43 )
44 44
45 45 archivespecs = util.sortdict((
46 46 ('zip', ('application/zip', 'zip', '.zip', None)),
47 47 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
48 48 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
49 49 ))
50 50
51 51 def getstyle(req, configfn, templatepath):
52 52 styles = (
53 53 req.qsparams.get('style', None),
54 54 configfn('web', 'style'),
55 55 'paper',
56 56 )
57 57 return styles, templater.stylemap(styles, templatepath)
58 58
59 59 def makebreadcrumb(url, prefix=''):
60 60 '''Return a 'URL breadcrumb' list
61 61
62 62 A 'URL breadcrumb' is a list of URL-name pairs,
63 63 corresponding to each of the path items on a URL.
64 64 This can be used to create path navigation entries.
65 65 '''
66 66 if url.endswith('/'):
67 67 url = url[:-1]
68 68 if prefix:
69 69 url = '/' + prefix + url
70 70 relpath = url
71 71 if relpath.startswith('/'):
72 72 relpath = relpath[1:]
73 73
74 74 breadcrumb = []
75 75 urlel = url
76 76 pathitems = [''] + relpath.split('/')
77 77 for pathel in reversed(pathitems):
78 78 if not pathel or not urlel:
79 79 break
80 80 breadcrumb.append({'url': urlel, 'name': pathel})
81 81 urlel = os.path.dirname(urlel)
82 82 return reversed(breadcrumb)
83 83
84 84 class requestcontext(object):
85 85 """Holds state/context for an individual request.
86 86
87 87 Servers can be multi-threaded. Holding state on the WSGI application
88 88 is prone to race conditions. Instances of this class exist to hold
89 89 mutable and race-free state for requests.
90 90 """
91 91 def __init__(self, app, repo, req, res):
92 92 self.repo = repo
93 93 self.reponame = app.reponame
94 94 self.req = req
95 95 self.res = res
96 96
97 97 self.archivespecs = archivespecs
98 98
99 99 self.maxchanges = self.configint('web', 'maxchanges')
100 100 self.stripecount = self.configint('web', 'stripes')
101 101 self.maxshortchanges = self.configint('web', 'maxshortchanges')
102 102 self.maxfiles = self.configint('web', 'maxfiles')
103 103 self.allowpull = self.configbool('web', 'allow-pull')
104 104
105 105 # we use untrusted=False to prevent a repo owner from using
106 106 # web.templates in .hg/hgrc to get access to any file readable
107 107 # by the user running the CGI script
108 108 self.templatepath = self.config('web', 'templates', untrusted=False)
109 109
110 110 # This object is more expensive to build than simple config values.
111 111 # It is shared across requests. The app will replace the object
112 112 # if it is updated. Since this is a reference and nothing should
113 113 # modify the underlying object, it should be constant for the lifetime
114 114 # of the request.
115 115 self.websubtable = app.websubtable
116 116
117 117 self.csp, self.nonce = cspvalues(self.repo.ui)
118 118
119 119 # Trust the settings from the .hg/hgrc files by default.
120 120 def config(self, section, name, default=uimod._unset, untrusted=True):
121 121 return self.repo.ui.config(section, name, default,
122 122 untrusted=untrusted)
123 123
124 124 def configbool(self, section, name, default=uimod._unset, untrusted=True):
125 125 return self.repo.ui.configbool(section, name, default,
126 126 untrusted=untrusted)
127 127
128 128 def configint(self, section, name, default=uimod._unset, untrusted=True):
129 129 return self.repo.ui.configint(section, name, default,
130 130 untrusted=untrusted)
131 131
132 132 def configlist(self, section, name, default=uimod._unset, untrusted=True):
133 133 return self.repo.ui.configlist(section, name, default,
134 134 untrusted=untrusted)
135 135
136 136 def archivelist(self, nodeid):
137 137 allowed = self.configlist('web', 'allow_archive')
138 138 for typ, spec in self.archivespecs.iteritems():
139 139 if typ in allowed or self.configbool('web', 'allow%s' % typ):
140 140 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
141 141
142 142 def templater(self, req):
143 143 # determine scheme, port and server name
144 144 # this is needed to create absolute urls
145 145 logourl = self.config('web', 'logourl')
146 146 logoimg = self.config('web', 'logoimg')
147 147 staticurl = (self.config('web', 'staticurl')
148 148 or req.apppath + '/static/')
149 149 if not staticurl.endswith('/'):
150 150 staticurl += '/'
151 151
152 152 # some functions for the templater
153 153
154 154 def motd(**map):
155 155 yield self.config('web', 'motd')
156 156
157 157 # figure out which style to use
158 158
159 159 vars = {}
160 160 styles, (style, mapfile) = getstyle(req, self.config,
161 161 self.templatepath)
162 162 if style == styles[0]:
163 163 vars['style'] = style
164 164
165 165 sessionvars = webutil.sessionvars(vars, '?')
166 166
167 167 if not self.reponame:
168 168 self.reponame = (self.config('web', 'name', '')
169 169 or req.reponame
170 170 or req.apppath
171 171 or self.repo.root)
172 172
173 173 def websubfilter(text):
174 174 return templatefilters.websub(text, self.websubtable)
175 175
176 176 # create the templater
177 177 # TODO: export all keywords: defaults = templatekw.keywords.copy()
178 178 defaults = {
179 179 'url': req.apppath + '/',
180 180 'logourl': logourl,
181 181 'logoimg': logoimg,
182 182 'staticurl': staticurl,
183 183 'urlbase': req.advertisedbaseurl,
184 184 'repo': self.reponame,
185 185 'encoding': encoding.encoding,
186 186 'motd': motd,
187 187 'sessionvars': sessionvars,
188 188 'pathdef': makebreadcrumb(req.apppath),
189 189 'style': style,
190 190 'nonce': self.nonce,
191 191 }
192 192 tres = formatter.templateresources(self.repo.ui, self.repo)
193 193 tmpl = templater.templater.frommapfile(mapfile,
194 194 filters={'websub': websubfilter},
195 195 defaults=defaults,
196 196 resources=tres)
197 197 return tmpl
198 198
199 199 def sendtemplate(self, name, **kwargs):
200 200 """Helper function to send a response generated from a template."""
201 201 self.res.setbodygen(self.tmpl(name, **kwargs))
202 202 return self.res.sendresponse()
203 203
204 204 class hgweb(object):
205 205 """HTTP server for individual repositories.
206 206
207 207 Instances of this class serve HTTP responses for a particular
208 208 repository.
209 209
210 210 Instances are typically used as WSGI applications.
211 211
212 212 Some servers are multi-threaded. On these servers, there may
213 213 be multiple active threads inside __call__.
214 214 """
215 215 def __init__(self, repo, name=None, baseui=None):
216 216 if isinstance(repo, str):
217 217 if baseui:
218 218 u = baseui.copy()
219 219 else:
220 220 u = uimod.ui.load()
221 221 r = hg.repository(u, repo)
222 222 else:
223 223 # we trust caller to give us a private copy
224 224 r = repo
225 225
226 226 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
227 227 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
228 228 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
229 229 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
230 230 # resolve file patterns relative to repo root
231 231 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
232 232 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
233 233 # displaying bundling progress bar while serving feel wrong and may
234 234 # break some wsgi implementation.
235 235 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
236 236 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
237 237 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
238 238 self._lastrepo = self._repos[0]
239 239 hook.redirect(True)
240 240 self.reponame = name
241 241
242 242 def _webifyrepo(self, repo):
243 243 repo = getwebview(repo)
244 244 self.websubtable = webutil.getwebsubs(repo)
245 245 return repo
246 246
247 247 @contextlib.contextmanager
248 248 def _obtainrepo(self):
249 249 """Obtain a repo unique to the caller.
250 250
251 251 Internally we maintain a stack of cachedlocalrepo instances
252 252 to be handed out. If one is available, we pop it and return it,
253 253 ensuring it is up to date in the process. If one is not available,
254 254 we clone the most recently used repo instance and return it.
255 255
256 256 It is currently possible for the stack to grow without bounds
257 257 if the server allows infinite threads. However, servers should
258 258 have a thread limit, thus establishing our limit.
259 259 """
260 260 if self._repos:
261 261 cached = self._repos.pop()
262 262 r, created = cached.fetch()
263 263 else:
264 264 cached = self._lastrepo.copy()
265 265 r, created = cached.fetch()
266 266 if created:
267 267 r = self._webifyrepo(r)
268 268
269 269 self._lastrepo = cached
270 270 self.mtime = cached.mtime
271 271 try:
272 272 yield r
273 273 finally:
274 274 self._repos.append(cached)
275 275
276 276 def run(self):
277 277 """Start a server from CGI environment.
278 278
279 279 Modern servers should be using WSGI and should avoid this
280 280 method, if possible.
281 281 """
282 282 if not encoding.environ.get('GATEWAY_INTERFACE',
283 283 '').startswith("CGI/1."):
284 284 raise RuntimeError("This function is only intended to be "
285 285 "called while running as a CGI script.")
286 286 wsgicgi.launch(self)
287 287
288 288 def __call__(self, env, respond):
289 289 """Run the WSGI application.
290 290
291 291 This may be called by multiple threads.
292 292 """
293 req = requestmod.wsgirequest(env, respond)
294 return self.run_wsgi(req)
293 req = requestmod.parserequestfromenv(env)
294 res = requestmod.wsgiresponse(req, respond)
295 295
296 def run_wsgi(self, wsgireq):
296 return self.run_wsgi(req, res)
297
298 def run_wsgi(self, req, res):
297 299 """Internal method to run the WSGI application.
298 300
299 301 This is typically only called by Mercurial. External consumers
300 302 should be using instances of this class as the WSGI application.
301 303 """
302 304 with self._obtainrepo() as repo:
303 305 profile = repo.ui.configbool('profiling', 'enabled')
304 306 with profiling.profile(repo.ui, enabled=profile):
305 for r in self._runwsgi(wsgireq, repo):
307 for r in self._runwsgi(req, res, repo):
306 308 yield r
307 309
308 def _runwsgi(self, wsgireq, repo):
309 req = wsgireq.req
310 res = wsgireq.res
310 def _runwsgi(self, req, res, repo):
311 311 rctx = requestcontext(self, repo, req, res)
312 312
313 313 # This state is global across all threads.
314 314 encoding.encoding = rctx.config('web', 'encoding')
315 315 rctx.repo.ui.environ = req.rawenv
316 316
317 317 if rctx.csp:
318 318 # hgwebdir may have added CSP header. Since we generate our own,
319 319 # replace it.
320 320 res.headers['Content-Security-Policy'] = rctx.csp
321 321
322 322 handled = wireprotoserver.handlewsgirequest(
323 323 rctx, req, res, self.check_perm)
324 324 if handled:
325 325 return res.sendresponse()
326 326
327 327 # Old implementations of hgweb supported dispatching the request via
328 328 # the initial query string parameter instead of using PATH_INFO.
329 329 # If PATH_INFO is present (signaled by ``req.dispatchpath`` having
330 330 # a value), we use it. Otherwise fall back to the query string.
331 331 if req.dispatchpath is not None:
332 332 query = req.dispatchpath
333 333 else:
334 334 query = req.querystring.partition('&')[0].partition(';')[0]
335 335
336 336 # translate user-visible url structure to internal structure
337 337
338 338 args = query.split('/', 2)
339 339 if 'cmd' not in req.qsparams and args and args[0]:
340 340 cmd = args.pop(0)
341 341 style = cmd.rfind('-')
342 342 if style != -1:
343 343 req.qsparams['style'] = cmd[:style]
344 344 cmd = cmd[style + 1:]
345 345
346 346 # avoid accepting e.g. style parameter as command
347 347 if util.safehasattr(webcommands, cmd):
348 348 req.qsparams['cmd'] = cmd
349 349
350 350 if cmd == 'static':
351 351 req.qsparams['file'] = '/'.join(args)
352 352 else:
353 353 if args and args[0]:
354 354 node = args.pop(0).replace('%2F', '/')
355 355 req.qsparams['node'] = node
356 356 if args:
357 357 if 'file' in req.qsparams:
358 358 del req.qsparams['file']
359 359 for a in args:
360 360 req.qsparams.add('file', a)
361 361
362 362 ua = req.headers.get('User-Agent', '')
363 363 if cmd == 'rev' and 'mercurial' in ua:
364 364 req.qsparams['style'] = 'raw'
365 365
366 366 if cmd == 'archive':
367 367 fn = req.qsparams['node']
368 368 for type_, spec in rctx.archivespecs.iteritems():
369 369 ext = spec[2]
370 370 if fn.endswith(ext):
371 371 req.qsparams['node'] = fn[:-len(ext)]
372 372 req.qsparams['type'] = type_
373 373 else:
374 374 cmd = req.qsparams.get('cmd', '')
375 375
376 376 # process the web interface request
377 377
378 378 try:
379 379 rctx.tmpl = rctx.templater(req)
380 380 ctype = rctx.tmpl('mimetype', encoding=encoding.encoding)
381 381 ctype = templater.stringify(ctype)
382 382
383 383 # check read permissions non-static content
384 384 if cmd != 'static':
385 385 self.check_perm(rctx, req, None)
386 386
387 387 if cmd == '':
388 388 req.qsparams['cmd'] = rctx.tmpl.cache['default']
389 389 cmd = req.qsparams['cmd']
390 390
391 391 # Don't enable caching if using a CSP nonce because then it wouldn't
392 392 # be a nonce.
393 393 if rctx.configbool('web', 'cache') and not rctx.nonce:
394 394 tag = 'W/"%d"' % self.mtime
395 395 if req.headers.get('If-None-Match') == tag:
396 396 res.status = '304 Not Modified'
397 397 # Response body not allowed on 304.
398 398 res.setbodybytes('')
399 399 return res.sendresponse()
400 400
401 401 res.headers['ETag'] = tag
402 402
403 403 if cmd not in webcommands.__all__:
404 404 msg = 'no such method: %s' % cmd
405 405 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
406 406 else:
407 407 # Set some globals appropriate for web handlers. Commands can
408 408 # override easily enough.
409 409 res.status = '200 Script output follows'
410 410 res.headers['Content-Type'] = ctype
411 411 return getattr(webcommands, cmd)(rctx)
412 412
413 413 except (error.LookupError, error.RepoLookupError) as err:
414 414 msg = pycompat.bytestr(err)
415 415 if (util.safehasattr(err, 'name') and
416 416 not isinstance(err, error.ManifestLookupError)):
417 417 msg = 'revision not found: %s' % err.name
418 418
419 419 res.status = '404 Not Found'
420 420 res.headers['Content-Type'] = ctype
421 421 return rctx.sendtemplate('error', error=msg)
422 422 except (error.RepoError, error.RevlogError) as e:
423 423 res.status = '500 Internal Server Error'
424 424 res.headers['Content-Type'] = ctype
425 425 return rctx.sendtemplate('error', error=pycompat.bytestr(e))
426 426 except ErrorResponse as e:
427 427 res.status = statusmessage(e.code, pycompat.bytestr(e))
428 428 res.headers['Content-Type'] = ctype
429 429 return rctx.sendtemplate('error', error=pycompat.bytestr(e))
430 430
431 431 def check_perm(self, rctx, req, op):
432 432 for permhook in permhooks:
433 433 permhook(rctx, req, op)
434 434
435 435 def getwebview(repo):
436 436 """The 'web.view' config controls changeset filter to hgweb. Possible
437 437 values are ``served``, ``visible`` and ``all``. Default is ``served``.
438 438 The ``served`` filter only shows changesets that can be pulled from the
439 439 hgweb instance. The``visible`` filter includes secret changesets but
440 440 still excludes "hidden" one.
441 441
442 442 See the repoview module for details.
443 443
444 444 The option has been around undocumented since Mercurial 2.5, but no
445 445 user ever asked about it. So we better keep it undocumented for now."""
446 446 # experimental config: web.view
447 447 viewconfig = repo.ui.config('web', 'view', untrusted=True)
448 448 if viewconfig == 'all':
449 449 return repo.unfiltered()
450 450 elif viewconfig in repoview.filtertable:
451 451 return repo.filtered(viewconfig)
452 452 else:
453 453 return repo.filtered('served')
@@ -1,527 +1,526
1 1 # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import time
13 13
14 14 from ..i18n import _
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_SERVER_ERROR,
19 19 cspvalues,
20 20 get_contact,
21 21 get_mtime,
22 22 ismember,
23 23 paritygen,
24 24 staticfile,
25 25 statusmessage,
26 26 )
27 27
28 28 from .. import (
29 29 configitems,
30 30 encoding,
31 31 error,
32 32 hg,
33 33 profiling,
34 34 pycompat,
35 35 scmutil,
36 36 templater,
37 37 ui as uimod,
38 38 util,
39 39 )
40 40
41 41 from . import (
42 42 hgweb_mod,
43 43 request as requestmod,
44 44 webutil,
45 45 wsgicgi,
46 46 )
47 47 from ..utils import dateutil
48 48
49 49 def cleannames(items):
50 50 return [(util.pconvert(name).strip('/'), path) for name, path in items]
51 51
52 52 def findrepos(paths):
53 53 repos = []
54 54 for prefix, root in cleannames(paths):
55 55 roothead, roottail = os.path.split(root)
56 56 # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
57 57 # /bar/ be served as as foo/N .
58 58 # '*' will not search inside dirs with .hg (except .hg/patches),
59 59 # '**' will search inside dirs with .hg (and thus also find subrepos).
60 60 try:
61 61 recurse = {'*': False, '**': True}[roottail]
62 62 except KeyError:
63 63 repos.append((prefix, root))
64 64 continue
65 65 roothead = os.path.normpath(os.path.abspath(roothead))
66 66 paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
67 67 repos.extend(urlrepos(prefix, roothead, paths))
68 68 return repos
69 69
70 70 def urlrepos(prefix, roothead, paths):
71 71 """yield url paths and filesystem paths from a list of repo paths
72 72
73 73 >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
74 74 >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
75 75 [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
76 76 >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
77 77 [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
78 78 """
79 79 for path in paths:
80 80 path = os.path.normpath(path)
81 81 yield (prefix + '/' +
82 82 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
83 83
84 84 def readallowed(ui, req):
85 85 """Check allow_read and deny_read config options of a repo's ui object
86 86 to determine user permissions. By default, with neither option set (or
87 87 both empty), allow all users to read the repo. There are two ways a
88 88 user can be denied read access: (1) deny_read is not empty, and the
89 89 user is unauthenticated or deny_read contains user (or *), and (2)
90 90 allow_read is not empty and the user is not in allow_read. Return True
91 91 if user is allowed to read the repo, else return False."""
92 92
93 93 user = req.remoteuser
94 94
95 95 deny_read = ui.configlist('web', 'deny_read', untrusted=True)
96 96 if deny_read and (not user or ismember(ui, user, deny_read)):
97 97 return False
98 98
99 99 allow_read = ui.configlist('web', 'allow_read', untrusted=True)
100 100 # by default, allow reading if no allow_read option has been set
101 101 if not allow_read or ismember(ui, user, allow_read):
102 102 return True
103 103
104 104 return False
105 105
106 106 def archivelist(ui, nodeid, url):
107 107 allowed = ui.configlist('web', 'allow_archive', untrusted=True)
108 108 archives = []
109 109
110 110 for typ, spec in hgweb_mod.archivespecs.iteritems():
111 111 if typ in allowed or ui.configbool('web', 'allow' + typ,
112 112 untrusted=True):
113 113 archives.append({
114 114 'type': typ,
115 115 'extension': spec[2],
116 116 'node': nodeid,
117 117 'url': url,
118 118 })
119 119
120 120 return archives
121 121
122 122 def rawindexentries(ui, repos, req, subdir=''):
123 123 descend = ui.configbool('web', 'descend')
124 124 collapse = ui.configbool('web', 'collapse')
125 125 seenrepos = set()
126 126 seendirs = set()
127 127 for name, path in repos:
128 128
129 129 if not name.startswith(subdir):
130 130 continue
131 131 name = name[len(subdir):]
132 132 directory = False
133 133
134 134 if '/' in name:
135 135 if not descend:
136 136 continue
137 137
138 138 nameparts = name.split('/')
139 139 rootname = nameparts[0]
140 140
141 141 if not collapse:
142 142 pass
143 143 elif rootname in seendirs:
144 144 continue
145 145 elif rootname in seenrepos:
146 146 pass
147 147 else:
148 148 directory = True
149 149 name = rootname
150 150
151 151 # redefine the path to refer to the directory
152 152 discarded = '/'.join(nameparts[1:])
153 153
154 154 # remove name parts plus accompanying slash
155 155 path = path[:-len(discarded) - 1]
156 156
157 157 try:
158 158 r = hg.repository(ui, path)
159 159 directory = False
160 160 except (IOError, error.RepoError):
161 161 pass
162 162
163 163 parts = [
164 164 req.apppath.strip('/'),
165 165 subdir.strip('/'),
166 166 name.strip('/'),
167 167 ]
168 168 url = '/' + '/'.join(p for p in parts if p) + '/'
169 169
170 170 # show either a directory entry or a repository
171 171 if directory:
172 172 # get the directory's time information
173 173 try:
174 174 d = (get_mtime(path), dateutil.makedate()[1])
175 175 except OSError:
176 176 continue
177 177
178 178 # add '/' to the name to make it obvious that
179 179 # the entry is a directory, not a regular repository
180 180 row = {'contact': "",
181 181 'contact_sort': "",
182 182 'name': name + '/',
183 183 'name_sort': name,
184 184 'url': url,
185 185 'description': "",
186 186 'description_sort': "",
187 187 'lastchange': d,
188 188 'lastchange_sort': d[1] - d[0],
189 189 'archives': [],
190 190 'isdirectory': True,
191 191 'labels': [],
192 192 }
193 193
194 194 seendirs.add(name)
195 195 yield row
196 196 continue
197 197
198 198 u = ui.copy()
199 199 try:
200 200 u.readconfig(os.path.join(path, '.hg', 'hgrc'))
201 201 except Exception as e:
202 202 u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
203 203 continue
204 204
205 205 def get(section, name, default=uimod._unset):
206 206 return u.config(section, name, default, untrusted=True)
207 207
208 208 if u.configbool("web", "hidden", untrusted=True):
209 209 continue
210 210
211 211 if not readallowed(u, req):
212 212 continue
213 213
214 214 # update time with local timezone
215 215 try:
216 216 r = hg.repository(ui, path)
217 217 except IOError:
218 218 u.warn(_('error accessing repository at %s\n') % path)
219 219 continue
220 220 except error.RepoError:
221 221 u.warn(_('error accessing repository at %s\n') % path)
222 222 continue
223 223 try:
224 224 d = (get_mtime(r.spath), dateutil.makedate()[1])
225 225 except OSError:
226 226 continue
227 227
228 228 contact = get_contact(get)
229 229 description = get("web", "description")
230 230 seenrepos.add(name)
231 231 name = get("web", "name", name)
232 232 row = {'contact': contact or "unknown",
233 233 'contact_sort': contact.upper() or "unknown",
234 234 'name': name,
235 235 'name_sort': name,
236 236 'url': url,
237 237 'description': description or "unknown",
238 238 'description_sort': description.upper() or "unknown",
239 239 'lastchange': d,
240 240 'lastchange_sort': d[1] - d[0],
241 241 'archives': archivelist(u, "tip", url),
242 242 'isdirectory': None,
243 243 'labels': u.configlist('web', 'labels', untrusted=True),
244 244 }
245 245
246 246 yield row
247 247
248 248 def indexentries(ui, repos, req, stripecount, sortcolumn='',
249 249 descending=False, subdir=''):
250 250
251 251 rows = rawindexentries(ui, repos, req, subdir=subdir)
252 252
253 253 sortdefault = None, False
254 254
255 255 if sortcolumn and sortdefault != (sortcolumn, descending):
256 256 sortkey = '%s_sort' % sortcolumn
257 257 rows = sorted(rows, key=lambda x: x[sortkey],
258 258 reverse=descending)
259 259
260 260 for row, parity in zip(rows, paritygen(stripecount)):
261 261 row['parity'] = parity
262 262 yield row
263 263
264 264 class hgwebdir(object):
265 265 """HTTP server for multiple repositories.
266 266
267 267 Given a configuration, different repositories will be served depending
268 268 on the request path.
269 269
270 270 Instances are typically used as WSGI applications.
271 271 """
272 272 def __init__(self, conf, baseui=None):
273 273 self.conf = conf
274 274 self.baseui = baseui
275 275 self.ui = None
276 276 self.lastrefresh = 0
277 277 self.motd = None
278 278 self.refresh()
279 279
280 280 def refresh(self):
281 281 if self.ui:
282 282 refreshinterval = self.ui.configint('web', 'refreshinterval')
283 283 else:
284 284 item = configitems.coreitems['web']['refreshinterval']
285 285 refreshinterval = item.default
286 286
287 287 # refreshinterval <= 0 means to always refresh.
288 288 if (refreshinterval > 0 and
289 289 self.lastrefresh + refreshinterval > time.time()):
290 290 return
291 291
292 292 if self.baseui:
293 293 u = self.baseui.copy()
294 294 else:
295 295 u = uimod.ui.load()
296 296 u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
297 297 u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
298 298 # displaying bundling progress bar while serving feels wrong and may
299 299 # break some wsgi implementations.
300 300 u.setconfig('progress', 'disable', 'true', 'hgweb')
301 301
302 302 if not isinstance(self.conf, (dict, list, tuple)):
303 303 map = {'paths': 'hgweb-paths'}
304 304 if not os.path.exists(self.conf):
305 305 raise error.Abort(_('config file %s not found!') % self.conf)
306 306 u.readconfig(self.conf, remap=map, trust=True)
307 307 paths = []
308 308 for name, ignored in u.configitems('hgweb-paths'):
309 309 for path in u.configlist('hgweb-paths', name):
310 310 paths.append((name, path))
311 311 elif isinstance(self.conf, (list, tuple)):
312 312 paths = self.conf
313 313 elif isinstance(self.conf, dict):
314 314 paths = self.conf.items()
315 315
316 316 repos = findrepos(paths)
317 317 for prefix, root in u.configitems('collections'):
318 318 prefix = util.pconvert(prefix)
319 319 for path in scmutil.walkrepos(root, followsym=True):
320 320 repo = os.path.normpath(path)
321 321 name = util.pconvert(repo)
322 322 if name.startswith(prefix):
323 323 name = name[len(prefix):]
324 324 repos.append((name.lstrip('/'), repo))
325 325
326 326 self.repos = repos
327 327 self.ui = u
328 328 encoding.encoding = self.ui.config('web', 'encoding')
329 329 self.style = self.ui.config('web', 'style')
330 330 self.templatepath = self.ui.config('web', 'templates', untrusted=False)
331 331 self.stripecount = self.ui.config('web', 'stripes')
332 332 if self.stripecount:
333 333 self.stripecount = int(self.stripecount)
334 334 prefix = self.ui.config('web', 'prefix')
335 335 if prefix.startswith('/'):
336 336 prefix = prefix[1:]
337 337 if prefix.endswith('/'):
338 338 prefix = prefix[:-1]
339 339 self.prefix = prefix
340 340 self.lastrefresh = time.time()
341 341
342 342 def run(self):
343 343 if not encoding.environ.get('GATEWAY_INTERFACE',
344 344 '').startswith("CGI/1."):
345 345 raise RuntimeError("This function is only intended to be "
346 346 "called while running as a CGI script.")
347 347 wsgicgi.launch(self)
348 348
349 349 def __call__(self, env, respond):
350 350 baseurl = self.ui.config('web', 'baseurl')
351 wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl)
352 return self.run_wsgi(wsgireq)
351 req = requestmod.parserequestfromenv(env, altbaseurl=baseurl)
352 res = requestmod.wsgiresponse(req, respond)
353 353
354 def run_wsgi(self, wsgireq):
354 return self.run_wsgi(req, res)
355
356 def run_wsgi(self, req, res):
355 357 profile = self.ui.configbool('profiling', 'enabled')
356 358 with profiling.profile(self.ui, enabled=profile):
357 for r in self._runwsgi(wsgireq):
359 for r in self._runwsgi(req, res):
358 360 yield r
359 361
360 def _runwsgi(self, wsgireq):
361 req = wsgireq.req
362 res = wsgireq.res
363
362 def _runwsgi(self, req, res):
364 363 try:
365 364 self.refresh()
366 365
367 366 csp, nonce = cspvalues(self.ui)
368 367 if csp:
369 368 res.headers['Content-Security-Policy'] = csp
370 369
371 370 virtual = req.dispatchpath.strip('/')
372 371 tmpl = self.templater(req, nonce)
373 372 ctype = tmpl('mimetype', encoding=encoding.encoding)
374 373 ctype = templater.stringify(ctype)
375 374
376 375 # Global defaults. These can be overridden by any handler.
377 376 res.status = '200 Script output follows'
378 377 res.headers['Content-Type'] = ctype
379 378
380 379 # a static file
381 380 if virtual.startswith('static/') or 'static' in req.qsparams:
382 381 if virtual.startswith('static/'):
383 382 fname = virtual[7:]
384 383 else:
385 384 fname = req.qsparams['static']
386 385 static = self.ui.config("web", "static", None,
387 386 untrusted=False)
388 387 if not static:
389 388 tp = self.templatepath or templater.templatepaths()
390 389 if isinstance(tp, str):
391 390 tp = [tp]
392 391 static = [os.path.join(p, 'static') for p in tp]
393 392
394 393 staticfile(static, fname, res)
395 394 return res.sendresponse()
396 395
397 396 # top-level index
398 397
399 398 repos = dict(self.repos)
400 399
401 400 if (not virtual or virtual == 'index') and virtual not in repos:
402 401 return self.makeindex(req, res, tmpl)
403 402
404 403 # nested indexes and hgwebs
405 404
406 405 if virtual.endswith('/index') and virtual not in repos:
407 406 subdir = virtual[:-len('index')]
408 407 if any(r.startswith(subdir) for r in repos):
409 408 return self.makeindex(req, res, tmpl, subdir)
410 409
411 410 def _virtualdirs():
412 411 # Check the full virtual path, each parent, and the root ('')
413 412 if virtual != '':
414 413 yield virtual
415 414
416 415 for p in util.finddirs(virtual):
417 416 yield p
418 417
419 418 yield ''
420 419
421 420 for virtualrepo in _virtualdirs():
422 421 real = repos.get(virtualrepo)
423 422 if real:
424 423 # Re-parse the WSGI environment to take into account our
425 424 # repository path component.
426 wsgireq.req = requestmod.parserequestfromenv(
427 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo,
425 req = requestmod.parserequestfromenv(
426 req.rawenv, reponame=virtualrepo,
428 427 altbaseurl=self.ui.config('web', 'baseurl'))
429 428 try:
430 429 # ensure caller gets private copy of ui
431 430 repo = hg.repository(self.ui.copy(), real)
432 return hgweb_mod.hgweb(repo).run_wsgi(wsgireq)
431 return hgweb_mod.hgweb(repo).run_wsgi(req, res)
433 432 except IOError as inst:
434 433 msg = encoding.strtolocal(inst.strerror)
435 434 raise ErrorResponse(HTTP_SERVER_ERROR, msg)
436 435 except error.RepoError as inst:
437 436 raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
438 437
439 438 # browse subdirectories
440 439 subdir = virtual + '/'
441 440 if [r for r in repos if r.startswith(subdir)]:
442 441 return self.makeindex(req, res, tmpl, subdir)
443 442
444 443 # prefixes not found
445 444 res.status = '404 Not Found'
446 445 res.setbodygen(tmpl('notfound', repo=virtual))
447 446 return res.sendresponse()
448 447
449 448 except ErrorResponse as e:
450 449 res.status = statusmessage(e.code, pycompat.bytestr(e))
451 450 res.setbodygen(tmpl('error', error=e.message or ''))
452 451 return res.sendresponse()
453 452 finally:
454 453 tmpl = None
455 454
456 455 def makeindex(self, req, res, tmpl, subdir=""):
457 456 self.refresh()
458 457 sortable = ["name", "description", "contact", "lastchange"]
459 458 sortcolumn, descending = None, False
460 459 if 'sort' in req.qsparams:
461 460 sortcolumn = req.qsparams['sort']
462 461 descending = sortcolumn.startswith('-')
463 462 if descending:
464 463 sortcolumn = sortcolumn[1:]
465 464 if sortcolumn not in sortable:
466 465 sortcolumn = ""
467 466
468 467 sort = [("sort_%s" % column,
469 468 "%s%s" % ((not descending and column == sortcolumn)
470 469 and "-" or "", column))
471 470 for column in sortable]
472 471
473 472 self.refresh()
474 473
475 474 entries = indexentries(self.ui, self.repos, req,
476 475 self.stripecount, sortcolumn=sortcolumn,
477 476 descending=descending, subdir=subdir)
478 477
479 478 res.setbodygen(tmpl(
480 479 'index',
481 480 entries=entries,
482 481 subdir=subdir,
483 482 pathdef=hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
484 483 sortcolumn=sortcolumn,
485 484 descending=descending,
486 485 **dict(sort)))
487 486
488 487 return res.sendresponse()
489 488
490 489 def templater(self, req, nonce):
491 490
492 491 def motd(**map):
493 492 if self.motd is not None:
494 493 yield self.motd
495 494 else:
496 495 yield config('web', 'motd')
497 496
498 497 def config(section, name, default=uimod._unset, untrusted=True):
499 498 return self.ui.config(section, name, default, untrusted)
500 499
501 500 vars = {}
502 501 styles, (style, mapfile) = hgweb_mod.getstyle(req, config,
503 502 self.templatepath)
504 503 if style == styles[0]:
505 504 vars['style'] = style
506 505
507 506 sessionvars = webutil.sessionvars(vars, r'?')
508 507 logourl = config('web', 'logourl')
509 508 logoimg = config('web', 'logoimg')
510 509 staticurl = (config('web', 'staticurl')
511 510 or req.apppath + '/static/')
512 511 if not staticurl.endswith('/'):
513 512 staticurl += '/'
514 513
515 514 defaults = {
516 515 "encoding": encoding.encoding,
517 516 "motd": motd,
518 517 "url": req.apppath + '/',
519 518 "logourl": logourl,
520 519 "logoimg": logoimg,
521 520 "staticurl": staticurl,
522 521 "sessionvars": sessionvars,
523 522 "style": style,
524 523 "nonce": nonce,
525 524 }
526 525 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
527 526 return tmpl
@@ -1,615 +1,585
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import wsgiref.headers as wsgiheaders
12 12 #import wsgiref.validate
13 13
14 14 from ..thirdparty import (
15 15 attr,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 20 util,
21 21 )
22 22
23 23 class multidict(object):
24 24 """A dict like object that can store multiple values for a key.
25 25
26 26 Used to store parsed request parameters.
27 27
28 28 This is inspired by WebOb's class of the same name.
29 29 """
30 30 def __init__(self):
31 31 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
32 32 # don't rely on parameters that much, so it shouldn't be a perf issue.
33 33 # we can always add dict for fast lookups.
34 34 self._items = []
35 35
36 36 def __getitem__(self, key):
37 37 """Returns the last set value for a key."""
38 38 for k, v in reversed(self._items):
39 39 if k == key:
40 40 return v
41 41
42 42 raise KeyError(key)
43 43
44 44 def __setitem__(self, key, value):
45 45 """Replace a values for a key with a new value."""
46 46 try:
47 47 del self[key]
48 48 except KeyError:
49 49 pass
50 50
51 51 self._items.append((key, value))
52 52
53 53 def __delitem__(self, key):
54 54 """Delete all values for a key."""
55 55 oldlen = len(self._items)
56 56
57 57 self._items[:] = [(k, v) for k, v in self._items if k != key]
58 58
59 59 if oldlen == len(self._items):
60 60 raise KeyError(key)
61 61
62 62 def __contains__(self, key):
63 63 return any(k == key for k, v in self._items)
64 64
65 65 def __len__(self):
66 66 return len(self._items)
67 67
68 68 def get(self, key, default=None):
69 69 try:
70 70 return self.__getitem__(key)
71 71 except KeyError:
72 72 return default
73 73
74 74 def add(self, key, value):
75 75 """Add a new value for a key. Does not replace existing values."""
76 76 self._items.append((key, value))
77 77
78 78 def getall(self, key):
79 79 """Obtains all values for a key."""
80 80 return [v for k, v in self._items if k == key]
81 81
82 82 def getone(self, key):
83 83 """Obtain a single value for a key.
84 84
85 85 Raises KeyError if key not defined or it has multiple values set.
86 86 """
87 87 vals = self.getall(key)
88 88
89 89 if not vals:
90 90 raise KeyError(key)
91 91
92 92 if len(vals) > 1:
93 93 raise KeyError('multiple values for %r' % key)
94 94
95 95 return vals[0]
96 96
97 97 def asdictoflists(self):
98 98 d = {}
99 99 for k, v in self._items:
100 100 if k in d:
101 101 d[k].append(v)
102 102 else:
103 103 d[k] = [v]
104 104
105 105 return d
106 106
107 107 @attr.s(frozen=True)
108 108 class parsedrequest(object):
109 109 """Represents a parsed WSGI request.
110 110
111 111 Contains both parsed parameters as well as a handle on the input stream.
112 112 """
113 113
114 114 # Request method.
115 115 method = attr.ib()
116 116 # Full URL for this request.
117 117 url = attr.ib()
118 118 # URL without any path components. Just <proto>://<host><port>.
119 119 baseurl = attr.ib()
120 120 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
121 121 # of HTTP: Host header for hostname. This is likely what clients used.
122 122 advertisedurl = attr.ib()
123 123 advertisedbaseurl = attr.ib()
124 124 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
125 125 urlscheme = attr.ib()
126 126 # Value of REMOTE_USER, if set, or None.
127 127 remoteuser = attr.ib()
128 128 # Value of REMOTE_HOST, if set, or None.
129 129 remotehost = attr.ib()
130 130 # Relative WSGI application path. If defined, will begin with a
131 131 # ``/``.
132 132 apppath = attr.ib()
133 133 # List of path parts to be used for dispatch.
134 134 dispatchparts = attr.ib()
135 135 # URL path component (no query string) used for dispatch. Can be
136 136 # ``None`` to signal no path component given to the request, an
137 137 # empty string to signal a request to the application's root URL,
138 138 # or a string not beginning with ``/`` containing the requested
139 139 # path under the application.
140 140 dispatchpath = attr.ib()
141 141 # The name of the repository being accessed.
142 142 reponame = attr.ib()
143 143 # Raw query string (part after "?" in URL).
144 144 querystring = attr.ib()
145 145 # multidict of query string parameters.
146 146 qsparams = attr.ib()
147 147 # wsgiref.headers.Headers instance. Operates like a dict with case
148 148 # insensitive keys.
149 149 headers = attr.ib()
150 150 # Request body input stream.
151 151 bodyfh = attr.ib()
152 152 # WSGI environment dict, unmodified.
153 153 rawenv = attr.ib()
154 154
155 def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
155 def parserequestfromenv(env, reponame=None, altbaseurl=None):
156 156 """Parse URL components from environment variables.
157 157
158 158 WSGI defines request attributes via environment variables. This function
159 159 parses the environment variables into a data structure.
160 160
161 161 If ``reponame`` is defined, the leading path components matching that
162 162 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
163 163 This simulates the world view of a WSGI application that processes
164 164 requests from the base URL of a repo.
165 165
166 166 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
167 167 is defined, it is used - instead of the WSGI environment variables - for
168 168 constructing URL components up to and including the WSGI application path.
169 169 For example, if the current WSGI application is at ``/repo`` and a request
170 170 is made to ``/rev/@`` with this argument set to
171 171 ``http://myserver:9000/prefix``, the URL and path components will resolve as
172 172 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
173 173 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
174 174 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
175 175 """
176 176 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
177 177
178 178 # We first validate that the incoming object conforms with the WSGI spec.
179 179 # We only want to be dealing with spec-conforming WSGI implementations.
180 180 # TODO enable this once we fix internal violations.
181 181 #wsgiref.validate.check_environ(env)
182 182
183 183 # PEP-0333 states that environment keys and values are native strings
184 184 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
185 185 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
186 186 # in Mercurial, so mass convert string keys and values to bytes.
187 187 if pycompat.ispy3:
188 188 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
189 189 env = {k: v.encode('latin-1') if isinstance(v, str) else v
190 190 for k, v in env.iteritems()}
191 191
192 192 if altbaseurl:
193 193 altbaseurl = util.url(altbaseurl)
194 194
195 195 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
196 196 # the environment variables.
197 197 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
198 198 # how URLs are reconstructed.
199 199 fullurl = env['wsgi.url_scheme'] + '://'
200 200
201 201 if altbaseurl and altbaseurl.scheme:
202 202 advertisedfullurl = altbaseurl.scheme + '://'
203 203 else:
204 204 advertisedfullurl = fullurl
205 205
206 206 def addport(s, port):
207 207 if s.startswith('https://'):
208 208 if port != '443':
209 209 s += ':' + port
210 210 else:
211 211 if port != '80':
212 212 s += ':' + port
213 213
214 214 return s
215 215
216 216 if env.get('HTTP_HOST'):
217 217 fullurl += env['HTTP_HOST']
218 218 else:
219 219 fullurl += env['SERVER_NAME']
220 220 fullurl = addport(fullurl, env['SERVER_PORT'])
221 221
222 222 if altbaseurl and altbaseurl.host:
223 223 advertisedfullurl += altbaseurl.host
224 224
225 225 if altbaseurl.port:
226 226 port = altbaseurl.port
227 227 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
228 228 port = '80'
229 229 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
230 230 port = '443'
231 231 else:
232 232 port = env['SERVER_PORT']
233 233
234 234 advertisedfullurl = addport(advertisedfullurl, port)
235 235 else:
236 236 advertisedfullurl += env['SERVER_NAME']
237 237 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
238 238
239 239 baseurl = fullurl
240 240 advertisedbaseurl = advertisedfullurl
241 241
242 242 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
243 243 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
244 244
245 245 if altbaseurl:
246 246 path = altbaseurl.path or ''
247 247 if path and not path.startswith('/'):
248 248 path = '/' + path
249 249 advertisedfullurl += util.urlreq.quote(path)
250 250 else:
251 251 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
252 252
253 253 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
254 254
255 255 if env.get('QUERY_STRING'):
256 256 fullurl += '?' + env['QUERY_STRING']
257 257 advertisedfullurl += '?' + env['QUERY_STRING']
258 258
259 259 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
260 260 # that represents the repository being dispatched to. When computing
261 261 # the dispatch info, we ignore these leading path components.
262 262
263 263 if altbaseurl:
264 264 apppath = altbaseurl.path or ''
265 265 if apppath and not apppath.startswith('/'):
266 266 apppath = '/' + apppath
267 267 else:
268 268 apppath = env.get('SCRIPT_NAME', '')
269 269
270 270 if reponame:
271 271 repoprefix = '/' + reponame.strip('/')
272 272
273 273 if not env.get('PATH_INFO'):
274 274 raise error.ProgrammingError('reponame requires PATH_INFO')
275 275
276 276 if not env['PATH_INFO'].startswith(repoprefix):
277 277 raise error.ProgrammingError('PATH_INFO does not begin with repo '
278 278 'name: %s (%s)' % (env['PATH_INFO'],
279 279 reponame))
280 280
281 281 dispatchpath = env['PATH_INFO'][len(repoprefix):]
282 282
283 283 if dispatchpath and not dispatchpath.startswith('/'):
284 284 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
285 285 'not end at path delimiter: %s (%s)' %
286 286 (env['PATH_INFO'], reponame))
287 287
288 288 apppath = apppath.rstrip('/') + repoprefix
289 289 dispatchparts = dispatchpath.strip('/').split('/')
290 290 dispatchpath = '/'.join(dispatchparts)
291 291
292 292 elif 'PATH_INFO' in env:
293 293 if env['PATH_INFO'].strip('/'):
294 294 dispatchparts = env['PATH_INFO'].strip('/').split('/')
295 295 dispatchpath = '/'.join(dispatchparts)
296 296 else:
297 297 dispatchparts = []
298 298 dispatchpath = ''
299 299 else:
300 300 dispatchparts = []
301 301 dispatchpath = None
302 302
303 303 querystring = env.get('QUERY_STRING', '')
304 304
305 305 # We store as a list so we have ordering information. We also store as
306 306 # a dict to facilitate fast lookup.
307 307 qsparams = multidict()
308 308 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
309 309 qsparams.add(k, v)
310 310
311 311 # HTTP_* keys contain HTTP request headers. The Headers structure should
312 312 # perform case normalization for us. We just rewrite underscore to dash
313 313 # so keys match what likely went over the wire.
314 314 headers = []
315 315 for k, v in env.iteritems():
316 316 if k.startswith('HTTP_'):
317 317 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
318 318
319 319 headers = wsgiheaders.Headers(headers)
320 320
321 321 # This is kind of a lie because the HTTP header wasn't explicitly
322 322 # sent. But for all intents and purposes it should be OK to lie about
323 323 # this, since a consumer will either either value to determine how many
324 324 # bytes are available to read.
325 325 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
326 326 headers['Content-Length'] = env['CONTENT_LENGTH']
327 327
328 # TODO do this once we remove wsgirequest.inp, otherwise we could have
329 # multiple readers from the underlying input stream.
330 #bodyfh = env['wsgi.input']
331 #if 'Content-Length' in headers:
332 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
328 bodyfh = env['wsgi.input']
329 if 'Content-Length' in headers:
330 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
333 331
334 332 return parsedrequest(method=env['REQUEST_METHOD'],
335 333 url=fullurl, baseurl=baseurl,
336 334 advertisedurl=advertisedfullurl,
337 335 advertisedbaseurl=advertisedbaseurl,
338 336 urlscheme=env['wsgi.url_scheme'],
339 337 remoteuser=env.get('REMOTE_USER'),
340 338 remotehost=env.get('REMOTE_HOST'),
341 339 apppath=apppath,
342 340 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
343 341 reponame=reponame,
344 342 querystring=querystring,
345 343 qsparams=qsparams,
346 344 headers=headers,
347 345 bodyfh=bodyfh,
348 346 rawenv=env)
349 347
350 348 class offsettrackingwriter(object):
351 349 """A file object like object that is append only and tracks write count.
352 350
353 351 Instances are bound to a callable. This callable is called with data
354 352 whenever a ``write()`` is attempted.
355 353
356 354 Instances track the amount of written data so they can answer ``tell()``
357 355 requests.
358 356
359 357 The intent of this class is to wrap the ``write()`` function returned by
360 358 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
361 359 not a file object, it doesn't implement other file object methods.
362 360 """
363 361 def __init__(self, writefn):
364 362 self._write = writefn
365 363 self._offset = 0
366 364
367 365 def write(self, s):
368 366 res = self._write(s)
369 367 # Some Python objects don't report the number of bytes written.
370 368 if res is None:
371 369 self._offset += len(s)
372 370 else:
373 371 self._offset += res
374 372
375 373 def flush(self):
376 374 pass
377 375
378 376 def tell(self):
379 377 return self._offset
380 378
381 379 class wsgiresponse(object):
382 380 """Represents a response to a WSGI request.
383 381
384 382 A response consists of a status line, headers, and a body.
385 383
386 384 Consumers must populate the ``status`` and ``headers`` fields and
387 385 make a call to a ``setbody*()`` method before the response can be
388 386 issued.
389 387
390 388 When it is time to start sending the response over the wire,
391 389 ``sendresponse()`` is called. It handles emitting the header portion
392 390 of the response message. It then yields chunks of body data to be
393 391 written to the peer. Typically, the WSGI application itself calls
394 392 and returns the value from ``sendresponse()``.
395 393 """
396 394
397 395 def __init__(self, req, startresponse):
398 396 """Create an empty response tied to a specific request.
399 397
400 398 ``req`` is a ``parsedrequest``. ``startresponse`` is the
401 399 ``start_response`` function passed to the WSGI application.
402 400 """
403 401 self._req = req
404 402 self._startresponse = startresponse
405 403
406 404 self.status = None
407 405 self.headers = wsgiheaders.Headers([])
408 406
409 407 self._bodybytes = None
410 408 self._bodygen = None
411 409 self._bodywillwrite = False
412 410 self._started = False
413 411 self._bodywritefn = None
414 412
415 413 def _verifybody(self):
416 414 if (self._bodybytes is not None or self._bodygen is not None
417 415 or self._bodywillwrite):
418 416 raise error.ProgrammingError('cannot define body multiple times')
419 417
420 418 def setbodybytes(self, b):
421 419 """Define the response body as static bytes.
422 420
423 421 The empty string signals that there is no response body.
424 422 """
425 423 self._verifybody()
426 424 self._bodybytes = b
427 425 self.headers['Content-Length'] = '%d' % len(b)
428 426
429 427 def setbodygen(self, gen):
430 428 """Define the response body as a generator of bytes."""
431 429 self._verifybody()
432 430 self._bodygen = gen
433 431
434 432 def setbodywillwrite(self):
435 433 """Signal an intent to use write() to emit the response body.
436 434
437 435 **This is the least preferred way to send a body.**
438 436
439 437 It is preferred for WSGI applications to emit a generator of chunks
440 438 constituting the response body. However, some consumers can't emit
441 439 data this way. So, WSGI provides a way to obtain a ``write(data)``
442 440 function that can be used to synchronously perform an unbuffered
443 441 write.
444 442
445 443 Calling this function signals an intent to produce the body in this
446 444 manner.
447 445 """
448 446 self._verifybody()
449 447 self._bodywillwrite = True
450 448
451 449 def sendresponse(self):
452 450 """Send the generated response to the client.
453 451
454 452 Before this is called, ``status`` must be set and one of
455 453 ``setbodybytes()`` or ``setbodygen()`` must be called.
456 454
457 455 Calling this method multiple times is not allowed.
458 456 """
459 457 if self._started:
460 458 raise error.ProgrammingError('sendresponse() called multiple times')
461 459
462 460 self._started = True
463 461
464 462 if not self.status:
465 463 raise error.ProgrammingError('status line not defined')
466 464
467 465 if (self._bodybytes is None and self._bodygen is None
468 466 and not self._bodywillwrite):
469 467 raise error.ProgrammingError('response body not defined')
470 468
471 469 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
472 470 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
473 471 # and SHOULD NOT generate other headers unless they could be used
474 472 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
475 473 # states that no response body can be issued. Content-Length can
476 474 # be sent. But if it is present, it should be the size of the response
477 475 # that wasn't transferred.
478 476 if self.status.startswith('304 '):
479 477 # setbodybytes('') will set C-L to 0. This doesn't conform with the
480 478 # spec. So remove it.
481 479 if self.headers.get('Content-Length') == '0':
482 480 del self.headers['Content-Length']
483 481
484 482 # Strictly speaking, this is too strict. But until it causes
485 483 # problems, let's be strict.
486 484 badheaders = {k for k in self.headers.keys()
487 485 if k.lower() not in ('date', 'etag', 'expires',
488 486 'cache-control',
489 487 'content-location',
490 488 'vary')}
491 489 if badheaders:
492 490 raise error.ProgrammingError(
493 491 'illegal header on 304 response: %s' %
494 492 ', '.join(sorted(badheaders)))
495 493
496 494 if self._bodygen is not None or self._bodywillwrite:
497 495 raise error.ProgrammingError("must use setbodybytes('') with "
498 496 "304 responses")
499 497
500 498 # Various HTTP clients (notably httplib) won't read the HTTP response
501 499 # until the HTTP request has been sent in full. If servers (us) send a
502 500 # response before the HTTP request has been fully sent, the connection
503 501 # may deadlock because neither end is reading.
504 502 #
505 503 # We work around this by "draining" the request data before
506 504 # sending any response in some conditions.
507 505 drain = False
508 506 close = False
509 507
510 508 # If the client sent Expect: 100-continue, we assume it is smart enough
511 509 # to deal with the server sending a response before reading the request.
512 510 # (httplib doesn't do this.)
513 511 if self._req.headers.get('Expect', '').lower() == '100-continue':
514 512 pass
515 513 # Only tend to request methods that have bodies. Strictly speaking,
516 514 # we should sniff for a body. But this is fine for our existing
517 515 # WSGI applications.
518 516 elif self._req.method not in ('POST', 'PUT'):
519 517 pass
520 518 else:
521 519 # If we don't know how much data to read, there's no guarantee
522 520 # that we can drain the request responsibly. The WSGI
523 521 # specification only says that servers *should* ensure the
524 522 # input stream doesn't overrun the actual request. So there's
525 523 # no guarantee that reading until EOF won't corrupt the stream
526 524 # state.
527 525 if not isinstance(self._req.bodyfh, util.cappedreader):
528 526 close = True
529 527 else:
530 528 # We /could/ only drain certain HTTP response codes. But 200 and
531 529 # non-200 wire protocol responses both require draining. Since
532 530 # we have a capped reader in place for all situations where we
533 531 # drain, it is safe to read from that stream. We'll either do
534 532 # a drain or no-op if we're already at EOF.
535 533 drain = True
536 534
537 535 if close:
538 536 self.headers['Connection'] = 'Close'
539 537
540 538 if drain:
541 539 assert isinstance(self._req.bodyfh, util.cappedreader)
542 540 while True:
543 541 chunk = self._req.bodyfh.read(32768)
544 542 if not chunk:
545 543 break
546 544
547 545 write = self._startresponse(pycompat.sysstr(self.status),
548 546 self.headers.items())
549 547
550 548 if self._bodybytes:
551 549 yield self._bodybytes
552 550 elif self._bodygen:
553 551 for chunk in self._bodygen:
554 552 yield chunk
555 553 elif self._bodywillwrite:
556 554 self._bodywritefn = write
557 555 else:
558 556 error.ProgrammingError('do not know how to send body')
559 557
560 558 def getbodyfile(self):
561 559 """Obtain a file object like object representing the response body.
562 560
563 561 For this to work, you must call ``setbodywillwrite()`` and then
564 562 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
565 563 function won't run to completion unless the generator is advanced. The
566 564 generator yields not items. The easiest way to consume it is with
567 565 ``list(res.sendresponse())``, which should resolve to an empty list -
568 566 ``[]``.
569 567 """
570 568 if not self._bodywillwrite:
571 569 raise error.ProgrammingError('must call setbodywillwrite() first')
572 570
573 571 if not self._started:
574 572 raise error.ProgrammingError('must call sendresponse() first; did '
575 573 'you remember to consume it since it '
576 574 'is a generator?')
577 575
578 576 assert self._bodywritefn
579 577 return offsettrackingwriter(self._bodywritefn)
580 578
581 class wsgirequest(object):
582 """Higher-level API for a WSGI request.
583
584 WSGI applications are invoked with 2 arguments. They are used to
585 instantiate instances of this class, which provides higher-level APIs
586 for obtaining request parameters, writing HTTP output, etc.
587 """
588 def __init__(self, wsgienv, start_response, altbaseurl=None):
589 version = wsgienv[r'wsgi.version']
590 if (version < (1, 0)) or (version >= (2, 0)):
591 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
592 % version)
593
594 inp = wsgienv[r'wsgi.input']
595
596 if r'HTTP_CONTENT_LENGTH' in wsgienv:
597 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
598 elif r'CONTENT_LENGTH' in wsgienv:
599 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
600
601 self.err = wsgienv[r'wsgi.errors']
602 self.threaded = wsgienv[r'wsgi.multithread']
603 self.multiprocess = wsgienv[r'wsgi.multiprocess']
604 self.run_once = wsgienv[r'wsgi.run_once']
605 self.env = wsgienv
606 self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
607 self.res = wsgiresponse(self.req, start_response)
608
609 579 def wsgiapplication(app_maker):
610 580 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
611 581 can and should now be used as a WSGI application.'''
612 582 application = app_maker()
613 583 def run_wsgi(env, respond):
614 584 return application(env, respond)
615 585 return run_wsgi
@@ -1,416 +1,416
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 from mercurial.hgweb import (
6 6 request as requestmod,
7 7 )
8 8 from mercurial import (
9 9 error,
10 10 )
11 11
12 12 DEFAULT_ENV = {
13 13 r'REQUEST_METHOD': r'GET',
14 14 r'SERVER_NAME': r'testserver',
15 15 r'SERVER_PORT': r'80',
16 16 r'SERVER_PROTOCOL': r'http',
17 17 r'wsgi.version': (1, 0),
18 18 r'wsgi.url_scheme': r'http',
19 19 r'wsgi.input': None,
20 20 r'wsgi.errors': None,
21 21 r'wsgi.multithread': False,
22 22 r'wsgi.multiprocess': True,
23 23 r'wsgi.run_once': False,
24 24 }
25 25
26 def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None):
26 def parse(env, reponame=None, altbaseurl=None, extra=None):
27 27 env = dict(env)
28 28 env.update(extra or {})
29 29
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame,
30 return requestmod.parserequestfromenv(env, reponame=reponame,
31 31 altbaseurl=altbaseurl)
32 32
33 33 class ParseRequestTests(unittest.TestCase):
34 34 def testdefault(self):
35 35 r = parse(DEFAULT_ENV)
36 36 self.assertEqual(r.url, b'http://testserver')
37 37 self.assertEqual(r.baseurl, b'http://testserver')
38 38 self.assertEqual(r.advertisedurl, b'http://testserver')
39 39 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
40 40 self.assertEqual(r.urlscheme, b'http')
41 41 self.assertEqual(r.method, b'GET')
42 42 self.assertIsNone(r.remoteuser)
43 43 self.assertIsNone(r.remotehost)
44 44 self.assertEqual(r.apppath, b'')
45 45 self.assertEqual(r.dispatchparts, [])
46 46 self.assertIsNone(r.dispatchpath)
47 47 self.assertIsNone(r.reponame)
48 48 self.assertEqual(r.querystring, b'')
49 49 self.assertEqual(len(r.qsparams), 0)
50 50 self.assertEqual(len(r.headers), 0)
51 51
52 52 def testcustomport(self):
53 53 r = parse(DEFAULT_ENV, extra={
54 54 r'SERVER_PORT': r'8000',
55 55 })
56 56
57 57 self.assertEqual(r.url, b'http://testserver:8000')
58 58 self.assertEqual(r.baseurl, b'http://testserver:8000')
59 59 self.assertEqual(r.advertisedurl, b'http://testserver:8000')
60 60 self.assertEqual(r.advertisedbaseurl, b'http://testserver:8000')
61 61
62 62 r = parse(DEFAULT_ENV, extra={
63 63 r'SERVER_PORT': r'4000',
64 64 r'wsgi.url_scheme': r'https',
65 65 })
66 66
67 67 self.assertEqual(r.url, b'https://testserver:4000')
68 68 self.assertEqual(r.baseurl, b'https://testserver:4000')
69 69 self.assertEqual(r.advertisedurl, b'https://testserver:4000')
70 70 self.assertEqual(r.advertisedbaseurl, b'https://testserver:4000')
71 71
72 72 def testhttphost(self):
73 73 r = parse(DEFAULT_ENV, extra={
74 74 r'HTTP_HOST': r'altserver',
75 75 })
76 76
77 77 self.assertEqual(r.url, b'http://altserver')
78 78 self.assertEqual(r.baseurl, b'http://altserver')
79 79 self.assertEqual(r.advertisedurl, b'http://testserver')
80 80 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
81 81
82 82 def testscriptname(self):
83 83 r = parse(DEFAULT_ENV, extra={
84 84 r'SCRIPT_NAME': r'',
85 85 })
86 86
87 87 self.assertEqual(r.url, b'http://testserver')
88 88 self.assertEqual(r.baseurl, b'http://testserver')
89 89 self.assertEqual(r.advertisedurl, b'http://testserver')
90 90 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
91 91 self.assertEqual(r.apppath, b'')
92 92 self.assertEqual(r.dispatchparts, [])
93 93 self.assertIsNone(r.dispatchpath)
94 94
95 95 r = parse(DEFAULT_ENV, extra={
96 96 r'SCRIPT_NAME': r'/script',
97 97 })
98 98
99 99 self.assertEqual(r.url, b'http://testserver/script')
100 100 self.assertEqual(r.baseurl, b'http://testserver')
101 101 self.assertEqual(r.advertisedurl, b'http://testserver/script')
102 102 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
103 103 self.assertEqual(r.apppath, b'/script')
104 104 self.assertEqual(r.dispatchparts, [])
105 105 self.assertIsNone(r.dispatchpath)
106 106
107 107 r = parse(DEFAULT_ENV, extra={
108 108 r'SCRIPT_NAME': r'/multiple words',
109 109 })
110 110
111 111 self.assertEqual(r.url, b'http://testserver/multiple%20words')
112 112 self.assertEqual(r.baseurl, b'http://testserver')
113 113 self.assertEqual(r.advertisedurl, b'http://testserver/multiple%20words')
114 114 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
115 115 self.assertEqual(r.apppath, b'/multiple words')
116 116 self.assertEqual(r.dispatchparts, [])
117 117 self.assertIsNone(r.dispatchpath)
118 118
119 119 def testpathinfo(self):
120 120 r = parse(DEFAULT_ENV, extra={
121 121 r'PATH_INFO': r'',
122 122 })
123 123
124 124 self.assertEqual(r.url, b'http://testserver')
125 125 self.assertEqual(r.baseurl, b'http://testserver')
126 126 self.assertEqual(r.advertisedurl, b'http://testserver')
127 127 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
128 128 self.assertEqual(r.apppath, b'')
129 129 self.assertEqual(r.dispatchparts, [])
130 130 self.assertEqual(r.dispatchpath, b'')
131 131
132 132 r = parse(DEFAULT_ENV, extra={
133 133 r'PATH_INFO': r'/pathinfo',
134 134 })
135 135
136 136 self.assertEqual(r.url, b'http://testserver/pathinfo')
137 137 self.assertEqual(r.baseurl, b'http://testserver')
138 138 self.assertEqual(r.advertisedurl, b'http://testserver/pathinfo')
139 139 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
140 140 self.assertEqual(r.apppath, b'')
141 141 self.assertEqual(r.dispatchparts, [b'pathinfo'])
142 142 self.assertEqual(r.dispatchpath, b'pathinfo')
143 143
144 144 r = parse(DEFAULT_ENV, extra={
145 145 r'PATH_INFO': r'/one/two/',
146 146 })
147 147
148 148 self.assertEqual(r.url, b'http://testserver/one/two/')
149 149 self.assertEqual(r.baseurl, b'http://testserver')
150 150 self.assertEqual(r.advertisedurl, b'http://testserver/one/two/')
151 151 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
152 152 self.assertEqual(r.apppath, b'')
153 153 self.assertEqual(r.dispatchparts, [b'one', b'two'])
154 154 self.assertEqual(r.dispatchpath, b'one/two')
155 155
156 156 def testscriptandpathinfo(self):
157 157 r = parse(DEFAULT_ENV, extra={
158 158 r'SCRIPT_NAME': r'/script',
159 159 r'PATH_INFO': r'/pathinfo',
160 160 })
161 161
162 162 self.assertEqual(r.url, b'http://testserver/script/pathinfo')
163 163 self.assertEqual(r.baseurl, b'http://testserver')
164 164 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
165 165 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
166 166 self.assertEqual(r.apppath, b'/script')
167 167 self.assertEqual(r.dispatchparts, [b'pathinfo'])
168 168 self.assertEqual(r.dispatchpath, b'pathinfo')
169 169
170 170 r = parse(DEFAULT_ENV, extra={
171 171 r'SCRIPT_NAME': r'/script1/script2',
172 172 r'PATH_INFO': r'/path1/path2',
173 173 })
174 174
175 175 self.assertEqual(r.url,
176 176 b'http://testserver/script1/script2/path1/path2')
177 177 self.assertEqual(r.baseurl, b'http://testserver')
178 178 self.assertEqual(r.advertisedurl,
179 179 b'http://testserver/script1/script2/path1/path2')
180 180 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
181 181 self.assertEqual(r.apppath, b'/script1/script2')
182 182 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
183 183 self.assertEqual(r.dispatchpath, b'path1/path2')
184 184
185 185 r = parse(DEFAULT_ENV, extra={
186 186 r'HTTP_HOST': r'hostserver',
187 187 r'SCRIPT_NAME': r'/script',
188 188 r'PATH_INFO': r'/pathinfo',
189 189 })
190 190
191 191 self.assertEqual(r.url, b'http://hostserver/script/pathinfo')
192 192 self.assertEqual(r.baseurl, b'http://hostserver')
193 193 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
194 194 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
195 195 self.assertEqual(r.apppath, b'/script')
196 196 self.assertEqual(r.dispatchparts, [b'pathinfo'])
197 197 self.assertEqual(r.dispatchpath, b'pathinfo')
198 198
199 199 def testreponame(self):
200 200 """repository path components get stripped from URL."""
201 201
202 202 with self.assertRaisesRegexp(error.ProgrammingError,
203 203 b'reponame requires PATH_INFO'):
204 204 parse(DEFAULT_ENV, reponame=b'repo')
205 205
206 206 with self.assertRaisesRegexp(error.ProgrammingError,
207 207 b'PATH_INFO does not begin with repo '
208 208 b'name'):
209 209 parse(DEFAULT_ENV, reponame=b'repo', extra={
210 210 r'PATH_INFO': r'/pathinfo',
211 211 })
212 212
213 213 with self.assertRaisesRegexp(error.ProgrammingError,
214 214 b'reponame prefix of PATH_INFO'):
215 215 parse(DEFAULT_ENV, reponame=b'repo', extra={
216 216 r'PATH_INFO': r'/repoextra/path',
217 217 })
218 218
219 219 r = parse(DEFAULT_ENV, reponame=b'repo', extra={
220 220 r'PATH_INFO': r'/repo/path1/path2',
221 221 })
222 222
223 223 self.assertEqual(r.url, b'http://testserver/repo/path1/path2')
224 224 self.assertEqual(r.baseurl, b'http://testserver')
225 225 self.assertEqual(r.advertisedurl, b'http://testserver/repo/path1/path2')
226 226 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
227 227 self.assertEqual(r.apppath, b'/repo')
228 228 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
229 229 self.assertEqual(r.dispatchpath, b'path1/path2')
230 230 self.assertEqual(r.reponame, b'repo')
231 231
232 232 r = parse(DEFAULT_ENV, reponame=b'prefix/repo', extra={
233 233 r'PATH_INFO': r'/prefix/repo/path1/path2',
234 234 })
235 235
236 236 self.assertEqual(r.url, b'http://testserver/prefix/repo/path1/path2')
237 237 self.assertEqual(r.baseurl, b'http://testserver')
238 238 self.assertEqual(r.advertisedurl,
239 239 b'http://testserver/prefix/repo/path1/path2')
240 240 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
241 241 self.assertEqual(r.apppath, b'/prefix/repo')
242 242 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
243 243 self.assertEqual(r.dispatchpath, b'path1/path2')
244 244 self.assertEqual(r.reponame, b'prefix/repo')
245 245
246 246 def testaltbaseurl(self):
247 247 # Simple hostname remap.
248 248 r = parse(DEFAULT_ENV, altbaseurl='http://altserver')
249 249
250 250 self.assertEqual(r.url, b'http://testserver')
251 251 self.assertEqual(r.baseurl, b'http://testserver')
252 252 self.assertEqual(r.advertisedurl, b'http://altserver')
253 253 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
254 254 self.assertEqual(r.urlscheme, b'http')
255 255 self.assertEqual(r.apppath, b'')
256 256 self.assertEqual(r.dispatchparts, [])
257 257 self.assertIsNone(r.dispatchpath)
258 258 self.assertIsNone(r.reponame)
259 259
260 260 # With a custom port.
261 261 r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000')
262 262 self.assertEqual(r.url, b'http://testserver')
263 263 self.assertEqual(r.baseurl, b'http://testserver')
264 264 self.assertEqual(r.advertisedurl, b'http://altserver:8000')
265 265 self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
266 266 self.assertEqual(r.urlscheme, b'http')
267 267 self.assertEqual(r.apppath, b'')
268 268 self.assertEqual(r.dispatchparts, [])
269 269 self.assertIsNone(r.dispatchpath)
270 270 self.assertIsNone(r.reponame)
271 271
272 272 # With a changed protocol.
273 273 r = parse(DEFAULT_ENV, altbaseurl='https://altserver')
274 274 self.assertEqual(r.url, b'http://testserver')
275 275 self.assertEqual(r.baseurl, b'http://testserver')
276 276 self.assertEqual(r.advertisedurl, b'https://altserver')
277 277 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
278 278 # URL scheme is defined as the actual scheme, not advertised.
279 279 self.assertEqual(r.urlscheme, b'http')
280 280 self.assertEqual(r.apppath, b'')
281 281 self.assertEqual(r.dispatchparts, [])
282 282 self.assertIsNone(r.dispatchpath)
283 283 self.assertIsNone(r.reponame)
284 284
285 285 # Need to specify explicit port number for proper https:// alt URLs.
286 286 r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443')
287 287 self.assertEqual(r.url, b'http://testserver')
288 288 self.assertEqual(r.baseurl, b'http://testserver')
289 289 self.assertEqual(r.advertisedurl, b'https://altserver')
290 290 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
291 291 self.assertEqual(r.urlscheme, b'http')
292 292 self.assertEqual(r.apppath, b'')
293 293 self.assertEqual(r.dispatchparts, [])
294 294 self.assertIsNone(r.dispatchpath)
295 295 self.assertIsNone(r.reponame)
296 296
297 297 # With only PATH_INFO defined.
298 298 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
299 299 r'PATH_INFO': r'/path1/path2',
300 300 })
301 301 self.assertEqual(r.url, b'http://testserver/path1/path2')
302 302 self.assertEqual(r.baseurl, b'http://testserver')
303 303 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
304 304 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
305 305 self.assertEqual(r.urlscheme, b'http')
306 306 self.assertEqual(r.apppath, b'')
307 307 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
308 308 self.assertEqual(r.dispatchpath, b'path1/path2')
309 309 self.assertIsNone(r.reponame)
310 310
311 311 # Path on alt URL.
312 312 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath')
313 313 self.assertEqual(r.url, b'http://testserver')
314 314 self.assertEqual(r.baseurl, b'http://testserver')
315 315 self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
316 316 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
317 317 self.assertEqual(r.urlscheme, b'http')
318 318 self.assertEqual(r.apppath, b'/altpath')
319 319 self.assertEqual(r.dispatchparts, [])
320 320 self.assertIsNone(r.dispatchpath)
321 321 self.assertIsNone(r.reponame)
322 322
323 323 # With a trailing slash.
324 324 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/')
325 325 self.assertEqual(r.url, b'http://testserver')
326 326 self.assertEqual(r.baseurl, b'http://testserver')
327 327 self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
328 328 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
329 329 self.assertEqual(r.urlscheme, b'http')
330 330 self.assertEqual(r.apppath, b'/altpath/')
331 331 self.assertEqual(r.dispatchparts, [])
332 332 self.assertIsNone(r.dispatchpath)
333 333 self.assertIsNone(r.reponame)
334 334
335 335 # PATH_INFO + path on alt URL.
336 336 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={
337 337 r'PATH_INFO': r'/path1/path2',
338 338 })
339 339 self.assertEqual(r.url, b'http://testserver/path1/path2')
340 340 self.assertEqual(r.baseurl, b'http://testserver')
341 341 self.assertEqual(r.advertisedurl,
342 342 b'http://altserver/altpath/path1/path2')
343 343 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
344 344 self.assertEqual(r.urlscheme, b'http')
345 345 self.assertEqual(r.apppath, b'/altpath')
346 346 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
347 347 self.assertEqual(r.dispatchpath, b'path1/path2')
348 348 self.assertIsNone(r.reponame)
349 349
350 350 # PATH_INFO + path on alt URL with trailing slash.
351 351 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={
352 352 r'PATH_INFO': r'/path1/path2',
353 353 })
354 354 self.assertEqual(r.url, b'http://testserver/path1/path2')
355 355 self.assertEqual(r.baseurl, b'http://testserver')
356 356 self.assertEqual(r.advertisedurl,
357 357 b'http://altserver/altpath//path1/path2')
358 358 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
359 359 self.assertEqual(r.urlscheme, b'http')
360 360 self.assertEqual(r.apppath, b'/altpath/')
361 361 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
362 362 self.assertEqual(r.dispatchpath, b'path1/path2')
363 363 self.assertIsNone(r.reponame)
364 364
365 365 # Local SCRIPT_NAME is ignored.
366 366 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
367 367 r'SCRIPT_NAME': r'/script',
368 368 r'PATH_INFO': r'/path1/path2',
369 369 })
370 370 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
371 371 self.assertEqual(r.baseurl, b'http://testserver')
372 372 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
373 373 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
374 374 self.assertEqual(r.urlscheme, b'http')
375 375 self.assertEqual(r.apppath, b'')
376 376 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
377 377 self.assertEqual(r.dispatchpath, b'path1/path2')
378 378 self.assertIsNone(r.reponame)
379 379
380 380 # Use remote's path for script name, app path
381 381 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={
382 382 r'SCRIPT_NAME': r'/script',
383 383 r'PATH_INFO': r'/path1/path2',
384 384 })
385 385 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
386 386 self.assertEqual(r.baseurl, b'http://testserver')
387 387 self.assertEqual(r.advertisedurl,
388 388 b'http://altserver/altroot/path1/path2')
389 389 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
390 390 self.assertEqual(r.urlscheme, b'http')
391 391 self.assertEqual(r.apppath, b'/altroot')
392 392 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
393 393 self.assertEqual(r.dispatchpath, b'path1/path2')
394 394 self.assertIsNone(r.reponame)
395 395
396 396 # reponame is factored in properly.
397 397 r = parse(DEFAULT_ENV, reponame=b'repo',
398 398 altbaseurl='http://altserver/altroot',
399 399 extra={
400 400 r'SCRIPT_NAME': r'/script',
401 401 r'PATH_INFO': r'/repo/path1/path2',
402 402 })
403 403
404 404 self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
405 405 self.assertEqual(r.baseurl, b'http://testserver')
406 406 self.assertEqual(r.advertisedurl,
407 407 b'http://altserver/altroot/repo/path1/path2')
408 408 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
409 409 self.assertEqual(r.apppath, b'/altroot/repo')
410 410 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
411 411 self.assertEqual(r.dispatchpath, b'path1/path2')
412 412 self.assertEqual(r.reponame, b'repo')
413 413
414 414 if __name__ == '__main__':
415 415 import silenttestrunner
416 416 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now