##// END OF EJS Templates
hgweb: expose repo name on parsedrequest...
Gregory Szorc -
r36884:8ddb5c35 default
parent child Browse files
Show More
@@ -1,442 +1,443
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import contextlib
12 12 import os
13 13
14 14 from .common import (
15 15 ErrorResponse,
16 16 HTTP_BAD_REQUEST,
17 17 HTTP_NOT_FOUND,
18 18 HTTP_NOT_MODIFIED,
19 19 HTTP_OK,
20 20 HTTP_SERVER_ERROR,
21 21 caching,
22 22 cspvalues,
23 23 permhooks,
24 24 )
25 25
26 26 from .. import (
27 27 encoding,
28 28 error,
29 29 formatter,
30 30 hg,
31 31 hook,
32 32 profiling,
33 33 pycompat,
34 34 repoview,
35 35 templatefilters,
36 36 templater,
37 37 ui as uimod,
38 38 util,
39 39 wireprotoserver,
40 40 )
41 41
42 42 from . import (
43 43 request as requestmod,
44 44 webcommands,
45 45 webutil,
46 46 wsgicgi,
47 47 )
48 48
49 49 archivespecs = util.sortdict((
50 50 ('zip', ('application/zip', 'zip', '.zip', None)),
51 51 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
52 52 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
53 53 ))
54 54
55 55 def getstyle(req, configfn, templatepath):
56 56 styles = (
57 57 req.qsparams.get('style', None),
58 58 configfn('web', 'style'),
59 59 'paper',
60 60 )
61 61 return styles, templater.stylemap(styles, templatepath)
62 62
63 63 def makebreadcrumb(url, prefix=''):
64 64 '''Return a 'URL breadcrumb' list
65 65
66 66 A 'URL breadcrumb' is a list of URL-name pairs,
67 67 corresponding to each of the path items on a URL.
68 68 This can be used to create path navigation entries.
69 69 '''
70 70 if url.endswith('/'):
71 71 url = url[:-1]
72 72 if prefix:
73 73 url = '/' + prefix + url
74 74 relpath = url
75 75 if relpath.startswith('/'):
76 76 relpath = relpath[1:]
77 77
78 78 breadcrumb = []
79 79 urlel = url
80 80 pathitems = [''] + relpath.split('/')
81 81 for pathel in reversed(pathitems):
82 82 if not pathel or not urlel:
83 83 break
84 84 breadcrumb.append({'url': urlel, 'name': pathel})
85 85 urlel = os.path.dirname(urlel)
86 86 return reversed(breadcrumb)
87 87
88 88 class requestcontext(object):
89 89 """Holds state/context for an individual request.
90 90
91 91 Servers can be multi-threaded. Holding state on the WSGI application
92 92 is prone to race conditions. Instances of this class exist to hold
93 93 mutable and race-free state for requests.
94 94 """
95 95 def __init__(self, app, repo):
96 96 self.repo = repo
97 97 self.reponame = app.reponame
98 98
99 99 self.archivespecs = archivespecs
100 100
101 101 self.maxchanges = self.configint('web', 'maxchanges')
102 102 self.stripecount = self.configint('web', 'stripes')
103 103 self.maxshortchanges = self.configint('web', 'maxshortchanges')
104 104 self.maxfiles = self.configint('web', 'maxfiles')
105 105 self.allowpull = self.configbool('web', 'allow-pull')
106 106
107 107 # we use untrusted=False to prevent a repo owner from using
108 108 # web.templates in .hg/hgrc to get access to any file readable
109 109 # by the user running the CGI script
110 110 self.templatepath = self.config('web', 'templates', untrusted=False)
111 111
112 112 # This object is more expensive to build than simple config values.
113 113 # It is shared across requests. The app will replace the object
114 114 # if it is updated. Since this is a reference and nothing should
115 115 # modify the underlying object, it should be constant for the lifetime
116 116 # of the request.
117 117 self.websubtable = app.websubtable
118 118
119 119 self.csp, self.nonce = cspvalues(self.repo.ui)
120 120
121 121 # Trust the settings from the .hg/hgrc files by default.
122 122 def config(self, section, name, default=uimod._unset, untrusted=True):
123 123 return self.repo.ui.config(section, name, default,
124 124 untrusted=untrusted)
125 125
126 126 def configbool(self, section, name, default=uimod._unset, untrusted=True):
127 127 return self.repo.ui.configbool(section, name, default,
128 128 untrusted=untrusted)
129 129
130 130 def configint(self, section, name, default=uimod._unset, untrusted=True):
131 131 return self.repo.ui.configint(section, name, default,
132 132 untrusted=untrusted)
133 133
134 134 def configlist(self, section, name, default=uimod._unset, untrusted=True):
135 135 return self.repo.ui.configlist(section, name, default,
136 136 untrusted=untrusted)
137 137
138 138 def archivelist(self, nodeid):
139 139 allowed = self.configlist('web', 'allow_archive')
140 140 for typ, spec in self.archivespecs.iteritems():
141 141 if typ in allowed or self.configbool('web', 'allow%s' % typ):
142 142 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
143 143
144 def templater(self, wsgireq, req):
144 def templater(self, req):
145 145 # determine scheme, port and server name
146 146 # this is needed to create absolute urls
147 147 logourl = self.config('web', 'logourl')
148 148 logoimg = self.config('web', 'logoimg')
149 149 staticurl = (self.config('web', 'staticurl')
150 150 or req.apppath + '/static/')
151 151 if not staticurl.endswith('/'):
152 152 staticurl += '/'
153 153
154 154 # some functions for the templater
155 155
156 156 def motd(**map):
157 157 yield self.config('web', 'motd')
158 158
159 159 # figure out which style to use
160 160
161 161 vars = {}
162 styles, (style, mapfile) = getstyle(wsgireq.req, self.config,
162 styles, (style, mapfile) = getstyle(req, self.config,
163 163 self.templatepath)
164 164 if style == styles[0]:
165 165 vars['style'] = style
166 166
167 167 sessionvars = webutil.sessionvars(vars, '?')
168 168
169 169 if not self.reponame:
170 170 self.reponame = (self.config('web', 'name', '')
171 or wsgireq.env.get('REPO_NAME')
172 or req.apppath or self.repo.root)
171 or req.reponame
172 or req.apppath
173 or self.repo.root)
173 174
174 175 def websubfilter(text):
175 176 return templatefilters.websub(text, self.websubtable)
176 177
177 178 # create the templater
178 179 # TODO: export all keywords: defaults = templatekw.keywords.copy()
179 180 defaults = {
180 181 'url': req.apppath + '/',
181 182 'logourl': logourl,
182 183 'logoimg': logoimg,
183 184 'staticurl': staticurl,
184 185 'urlbase': req.advertisedbaseurl,
185 186 'repo': self.reponame,
186 187 'encoding': encoding.encoding,
187 188 'motd': motd,
188 189 'sessionvars': sessionvars,
189 190 'pathdef': makebreadcrumb(req.apppath),
190 191 'style': style,
191 192 'nonce': self.nonce,
192 193 }
193 194 tres = formatter.templateresources(self.repo.ui, self.repo)
194 195 tmpl = templater.templater.frommapfile(mapfile,
195 196 filters={'websub': websubfilter},
196 197 defaults=defaults,
197 198 resources=tres)
198 199 return tmpl
199 200
200 201
201 202 class hgweb(object):
202 203 """HTTP server for individual repositories.
203 204
204 205 Instances of this class serve HTTP responses for a particular
205 206 repository.
206 207
207 208 Instances are typically used as WSGI applications.
208 209
209 210 Some servers are multi-threaded. On these servers, there may
210 211 be multiple active threads inside __call__.
211 212 """
212 213 def __init__(self, repo, name=None, baseui=None):
213 214 if isinstance(repo, str):
214 215 if baseui:
215 216 u = baseui.copy()
216 217 else:
217 218 u = uimod.ui.load()
218 219 r = hg.repository(u, repo)
219 220 else:
220 221 # we trust caller to give us a private copy
221 222 r = repo
222 223
223 224 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
224 225 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
225 226 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
226 227 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
227 228 # resolve file patterns relative to repo root
228 229 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
229 230 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
230 231 # displaying bundling progress bar while serving feel wrong and may
231 232 # break some wsgi implementation.
232 233 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
233 234 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
234 235 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
235 236 self._lastrepo = self._repos[0]
236 237 hook.redirect(True)
237 238 self.reponame = name
238 239
239 240 def _webifyrepo(self, repo):
240 241 repo = getwebview(repo)
241 242 self.websubtable = webutil.getwebsubs(repo)
242 243 return repo
243 244
244 245 @contextlib.contextmanager
245 246 def _obtainrepo(self):
246 247 """Obtain a repo unique to the caller.
247 248
248 249 Internally we maintain a stack of cachedlocalrepo instances
249 250 to be handed out. If one is available, we pop it and return it,
250 251 ensuring it is up to date in the process. If one is not available,
251 252 we clone the most recently used repo instance and return it.
252 253
253 254 It is currently possible for the stack to grow without bounds
254 255 if the server allows infinite threads. However, servers should
255 256 have a thread limit, thus establishing our limit.
256 257 """
257 258 if self._repos:
258 259 cached = self._repos.pop()
259 260 r, created = cached.fetch()
260 261 else:
261 262 cached = self._lastrepo.copy()
262 263 r, created = cached.fetch()
263 264 if created:
264 265 r = self._webifyrepo(r)
265 266
266 267 self._lastrepo = cached
267 268 self.mtime = cached.mtime
268 269 try:
269 270 yield r
270 271 finally:
271 272 self._repos.append(cached)
272 273
273 274 def run(self):
274 275 """Start a server from CGI environment.
275 276
276 277 Modern servers should be using WSGI and should avoid this
277 278 method, if possible.
278 279 """
279 280 if not encoding.environ.get('GATEWAY_INTERFACE',
280 281 '').startswith("CGI/1."):
281 282 raise RuntimeError("This function is only intended to be "
282 283 "called while running as a CGI script.")
283 284 wsgicgi.launch(self)
284 285
285 286 def __call__(self, env, respond):
286 287 """Run the WSGI application.
287 288
288 289 This may be called by multiple threads.
289 290 """
290 291 req = requestmod.wsgirequest(env, respond)
291 292 return self.run_wsgi(req)
292 293
293 294 def run_wsgi(self, wsgireq):
294 295 """Internal method to run the WSGI application.
295 296
296 297 This is typically only called by Mercurial. External consumers
297 298 should be using instances of this class as the WSGI application.
298 299 """
299 300 with self._obtainrepo() as repo:
300 301 profile = repo.ui.configbool('profiling', 'enabled')
301 302 with profiling.profile(repo.ui, enabled=profile):
302 303 for r in self._runwsgi(wsgireq, repo):
303 304 yield r
304 305
305 306 def _runwsgi(self, wsgireq, repo):
306 307 req = wsgireq.req
307 308 res = wsgireq.res
308 309 rctx = requestcontext(self, repo)
309 310
310 311 # This state is global across all threads.
311 312 encoding.encoding = rctx.config('web', 'encoding')
312 313 rctx.repo.ui.environ = wsgireq.env
313 314
314 315 if rctx.csp:
315 316 # hgwebdir may have added CSP header. Since we generate our own,
316 317 # replace it.
317 318 wsgireq.headers = [h for h in wsgireq.headers
318 319 if h[0] != 'Content-Security-Policy']
319 320 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
320 321 res.headers['Content-Security-Policy'] = rctx.csp
321 322
322 323 handled = wireprotoserver.handlewsgirequest(
323 324 rctx, wsgireq, req, res, self.check_perm)
324 325 if handled:
325 326 return res.sendresponse()
326 327
327 328 if req.havepathinfo:
328 329 query = req.dispatchpath
329 330 else:
330 331 query = req.querystring.partition('&')[0].partition(';')[0]
331 332
332 333 # translate user-visible url structure to internal structure
333 334
334 335 args = query.split('/', 2)
335 336 if 'cmd' not in req.qsparams and args and args[0]:
336 337 cmd = args.pop(0)
337 338 style = cmd.rfind('-')
338 339 if style != -1:
339 340 req.qsparams['style'] = cmd[:style]
340 341 cmd = cmd[style + 1:]
341 342
342 343 # avoid accepting e.g. style parameter as command
343 344 if util.safehasattr(webcommands, cmd):
344 345 req.qsparams['cmd'] = cmd
345 346
346 347 if cmd == 'static':
347 348 req.qsparams['file'] = '/'.join(args)
348 349 else:
349 350 if args and args[0]:
350 351 node = args.pop(0).replace('%2F', '/')
351 352 req.qsparams['node'] = node
352 353 if args:
353 354 if 'file' in req.qsparams:
354 355 del req.qsparams['file']
355 356 for a in args:
356 357 req.qsparams.add('file', a)
357 358
358 359 ua = req.headers.get('User-Agent', '')
359 360 if cmd == 'rev' and 'mercurial' in ua:
360 361 req.qsparams['style'] = 'raw'
361 362
362 363 if cmd == 'archive':
363 364 fn = req.qsparams['node']
364 365 for type_, spec in rctx.archivespecs.iteritems():
365 366 ext = spec[2]
366 367 if fn.endswith(ext):
367 368 req.qsparams['node'] = fn[:-len(ext)]
368 369 req.qsparams['type'] = type_
369 370 else:
370 371 cmd = req.qsparams.get('cmd', '')
371 372
372 373 # process the web interface request
373 374
374 375 try:
375 tmpl = rctx.templater(wsgireq, req)
376 tmpl = rctx.templater(req)
376 377 ctype = tmpl('mimetype', encoding=encoding.encoding)
377 378 ctype = templater.stringify(ctype)
378 379
379 380 # check read permissions non-static content
380 381 if cmd != 'static':
381 382 self.check_perm(rctx, wsgireq, None)
382 383
383 384 if cmd == '':
384 385 req.qsparams['cmd'] = tmpl.cache['default']
385 386 cmd = req.qsparams['cmd']
386 387
387 388 # Don't enable caching if using a CSP nonce because then it wouldn't
388 389 # be a nonce.
389 390 if rctx.configbool('web', 'cache') and not rctx.nonce:
390 391 caching(self, wsgireq) # sets ETag header or raises NOT_MODIFIED
391 392 if cmd not in webcommands.__all__:
392 393 msg = 'no such method: %s' % cmd
393 394 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
394 395 elif cmd == 'file' and req.qsparams.get('style') == 'raw':
395 396 rctx.ctype = ctype
396 397 content = webcommands.rawfile(rctx, wsgireq, tmpl)
397 398 else:
398 399 content = getattr(webcommands, cmd)(rctx, wsgireq, tmpl)
399 400 wsgireq.respond(HTTP_OK, ctype)
400 401
401 402 return content
402 403
403 404 except (error.LookupError, error.RepoLookupError) as err:
404 405 wsgireq.respond(HTTP_NOT_FOUND, ctype)
405 406 msg = pycompat.bytestr(err)
406 407 if (util.safehasattr(err, 'name') and
407 408 not isinstance(err, error.ManifestLookupError)):
408 409 msg = 'revision not found: %s' % err.name
409 410 return tmpl('error', error=msg)
410 411 except (error.RepoError, error.RevlogError) as inst:
411 412 wsgireq.respond(HTTP_SERVER_ERROR, ctype)
412 413 return tmpl('error', error=pycompat.bytestr(inst))
413 414 except ErrorResponse as inst:
414 415 wsgireq.respond(inst, ctype)
415 416 if inst.code == HTTP_NOT_MODIFIED:
416 417 # Not allowed to return a body on a 304
417 418 return ['']
418 419 return tmpl('error', error=pycompat.bytestr(inst))
419 420
420 421 def check_perm(self, rctx, req, op):
421 422 for permhook in permhooks:
422 423 permhook(rctx, req, op)
423 424
424 425 def getwebview(repo):
425 426 """The 'web.view' config controls changeset filter to hgweb. Possible
426 427 values are ``served``, ``visible`` and ``all``. Default is ``served``.
427 428 The ``served`` filter only shows changesets that can be pulled from the
428 429 hgweb instance. The``visible`` filter includes secret changesets but
429 430 still excludes "hidden" one.
430 431
431 432 See the repoview module for details.
432 433
433 434 The option has been around undocumented since Mercurial 2.5, but no
434 435 user ever asked about it. So we better keep it undocumented for now."""
435 436 # experimental config: web.view
436 437 viewconfig = repo.ui.config('web', 'view', untrusted=True)
437 438 if viewconfig == 'all':
438 439 return repo.unfiltered()
439 440 elif viewconfig in repoview.filtertable:
440 441 return repo.filtered(viewconfig)
441 442 else:
442 443 return repo.filtered('served')
@@ -1,547 +1,550
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 26 error,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 class multidict(object):
32 32 """A dict like object that can store multiple values for a key.
33 33
34 34 Used to store parsed request parameters.
35 35
36 36 This is inspired by WebOb's class of the same name.
37 37 """
38 38 def __init__(self):
39 39 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
40 40 # don't rely on parameters that much, so it shouldn't be a perf issue.
41 41 # we can always add dict for fast lookups.
42 42 self._items = []
43 43
44 44 def __getitem__(self, key):
45 45 """Returns the last set value for a key."""
46 46 for k, v in reversed(self._items):
47 47 if k == key:
48 48 return v
49 49
50 50 raise KeyError(key)
51 51
52 52 def __setitem__(self, key, value):
53 53 """Replace a values for a key with a new value."""
54 54 try:
55 55 del self[key]
56 56 except KeyError:
57 57 pass
58 58
59 59 self._items.append((key, value))
60 60
61 61 def __delitem__(self, key):
62 62 """Delete all values for a key."""
63 63 oldlen = len(self._items)
64 64
65 65 self._items[:] = [(k, v) for k, v in self._items if k != key]
66 66
67 67 if oldlen == len(self._items):
68 68 raise KeyError(key)
69 69
70 70 def __contains__(self, key):
71 71 return any(k == key for k, v in self._items)
72 72
73 73 def __len__(self):
74 74 return len(self._items)
75 75
76 76 def get(self, key, default=None):
77 77 try:
78 78 return self.__getitem__(key)
79 79 except KeyError:
80 80 return default
81 81
82 82 def add(self, key, value):
83 83 """Add a new value for a key. Does not replace existing values."""
84 84 self._items.append((key, value))
85 85
86 86 def getall(self, key):
87 87 """Obtains all values for a key."""
88 88 return [v for k, v in self._items if k == key]
89 89
90 90 def getone(self, key):
91 91 """Obtain a single value for a key.
92 92
93 93 Raises KeyError if key not defined or it has multiple values set.
94 94 """
95 95 vals = self.getall(key)
96 96
97 97 if not vals:
98 98 raise KeyError(key)
99 99
100 100 if len(vals) > 1:
101 101 raise KeyError('multiple values for %r' % key)
102 102
103 103 return vals[0]
104 104
105 105 def asdictoflists(self):
106 106 d = {}
107 107 for k, v in self._items:
108 108 if k in d:
109 109 d[k].append(v)
110 110 else:
111 111 d[k] = [v]
112 112
113 113 return d
114 114
115 115 @attr.s(frozen=True)
116 116 class parsedrequest(object):
117 117 """Represents a parsed WSGI request.
118 118
119 119 Contains both parsed parameters as well as a handle on the input stream.
120 120 """
121 121
122 122 # Request method.
123 123 method = attr.ib()
124 124 # Full URL for this request.
125 125 url = attr.ib()
126 126 # URL without any path components. Just <proto>://<host><port>.
127 127 baseurl = attr.ib()
128 128 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
129 129 # of HTTP: Host header for hostname. This is likely what clients used.
130 130 advertisedurl = attr.ib()
131 131 advertisedbaseurl = attr.ib()
132 132 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
133 133 urlscheme = attr.ib()
134 134 # Value of REMOTE_USER, if set, or None.
135 135 remoteuser = attr.ib()
136 136 # Value of REMOTE_HOST, if set, or None.
137 137 remotehost = attr.ib()
138 138 # WSGI application path.
139 139 apppath = attr.ib()
140 140 # List of path parts to be used for dispatch.
141 141 dispatchparts = attr.ib()
142 142 # URL path component (no query string) used for dispatch.
143 143 dispatchpath = attr.ib()
144 144 # Whether there is a path component to this request. This can be true
145 145 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
146 146 havepathinfo = attr.ib()
147 # The name of the repository being accessed.
148 reponame = attr.ib()
147 149 # Raw query string (part after "?" in URL).
148 150 querystring = attr.ib()
149 151 # multidict of query string parameters.
150 152 qsparams = attr.ib()
151 153 # wsgiref.headers.Headers instance. Operates like a dict with case
152 154 # insensitive keys.
153 155 headers = attr.ib()
154 156 # Request body input stream.
155 157 bodyfh = attr.ib()
156 158
157 159 def parserequestfromenv(env, bodyfh):
158 160 """Parse URL components from environment variables.
159 161
160 162 WSGI defines request attributes via environment variables. This function
161 163 parses the environment variables into a data structure.
162 164 """
163 165 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
164 166
165 167 # We first validate that the incoming object conforms with the WSGI spec.
166 168 # We only want to be dealing with spec-conforming WSGI implementations.
167 169 # TODO enable this once we fix internal violations.
168 170 #wsgiref.validate.check_environ(env)
169 171
170 172 # PEP-0333 states that environment keys and values are native strings
171 173 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
172 174 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
173 175 # in Mercurial, so mass convert string keys and values to bytes.
174 176 if pycompat.ispy3:
175 177 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
176 178 env = {k: v.encode('latin-1') if isinstance(v, str) else v
177 179 for k, v in env.iteritems()}
178 180
179 181 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
180 182 # the environment variables.
181 183 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
182 184 # how URLs are reconstructed.
183 185 fullurl = env['wsgi.url_scheme'] + '://'
184 186 advertisedfullurl = fullurl
185 187
186 188 def addport(s):
187 189 if env['wsgi.url_scheme'] == 'https':
188 190 if env['SERVER_PORT'] != '443':
189 191 s += ':' + env['SERVER_PORT']
190 192 else:
191 193 if env['SERVER_PORT'] != '80':
192 194 s += ':' + env['SERVER_PORT']
193 195
194 196 return s
195 197
196 198 if env.get('HTTP_HOST'):
197 199 fullurl += env['HTTP_HOST']
198 200 else:
199 201 fullurl += env['SERVER_NAME']
200 202 fullurl = addport(fullurl)
201 203
202 204 advertisedfullurl += env['SERVER_NAME']
203 205 advertisedfullurl = addport(advertisedfullurl)
204 206
205 207 baseurl = fullurl
206 208 advertisedbaseurl = advertisedfullurl
207 209
208 210 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
209 211 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
210 212 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
211 213 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
212 214
213 215 if env.get('QUERY_STRING'):
214 216 fullurl += '?' + env['QUERY_STRING']
215 217 advertisedfullurl += '?' + env['QUERY_STRING']
216 218
217 219 # When dispatching requests, we look at the URL components (PATH_INFO
218 220 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
219 221 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
220 222 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
221 223 # root. We also exclude its path components from PATH_INFO when resolving
222 224 # the dispatch path.
223 225
224 226 apppath = env['SCRIPT_NAME']
225 227
226 228 if env.get('REPO_NAME'):
227 229 if not apppath.endswith('/'):
228 230 apppath += '/'
229 231
230 232 apppath += env.get('REPO_NAME')
231 233
232 234 if 'PATH_INFO' in env:
233 235 dispatchparts = env['PATH_INFO'].strip('/').split('/')
234 236
235 237 # Strip out repo parts.
236 238 repoparts = env.get('REPO_NAME', '').split('/')
237 239 if dispatchparts[:len(repoparts)] == repoparts:
238 240 dispatchparts = dispatchparts[len(repoparts):]
239 241 else:
240 242 dispatchparts = []
241 243
242 244 dispatchpath = '/'.join(dispatchparts)
243 245
244 246 querystring = env.get('QUERY_STRING', '')
245 247
246 248 # We store as a list so we have ordering information. We also store as
247 249 # a dict to facilitate fast lookup.
248 250 qsparams = multidict()
249 251 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
250 252 qsparams.add(k, v)
251 253
252 254 # HTTP_* keys contain HTTP request headers. The Headers structure should
253 255 # perform case normalization for us. We just rewrite underscore to dash
254 256 # so keys match what likely went over the wire.
255 257 headers = []
256 258 for k, v in env.iteritems():
257 259 if k.startswith('HTTP_'):
258 260 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
259 261
260 262 headers = wsgiheaders.Headers(headers)
261 263
262 264 # This is kind of a lie because the HTTP header wasn't explicitly
263 265 # sent. But for all intents and purposes it should be OK to lie about
264 266 # this, since a consumer will either either value to determine how many
265 267 # bytes are available to read.
266 268 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
267 269 headers['Content-Length'] = env['CONTENT_LENGTH']
268 270
269 271 # TODO do this once we remove wsgirequest.inp, otherwise we could have
270 272 # multiple readers from the underlying input stream.
271 273 #bodyfh = env['wsgi.input']
272 274 #if 'Content-Length' in headers:
273 275 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
274 276
275 277 return parsedrequest(method=env['REQUEST_METHOD'],
276 278 url=fullurl, baseurl=baseurl,
277 279 advertisedurl=advertisedfullurl,
278 280 advertisedbaseurl=advertisedbaseurl,
279 281 urlscheme=env['wsgi.url_scheme'],
280 282 remoteuser=env.get('REMOTE_USER'),
281 283 remotehost=env.get('REMOTE_HOST'),
282 284 apppath=apppath,
283 285 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
284 286 havepathinfo='PATH_INFO' in env,
287 reponame=env.get('REPO_NAME'),
285 288 querystring=querystring,
286 289 qsparams=qsparams,
287 290 headers=headers,
288 291 bodyfh=bodyfh)
289 292
290 293 class wsgiresponse(object):
291 294 """Represents a response to a WSGI request.
292 295
293 296 A response consists of a status line, headers, and a body.
294 297
295 298 Consumers must populate the ``status`` and ``headers`` fields and
296 299 make a call to a ``setbody*()`` method before the response can be
297 300 issued.
298 301
299 302 When it is time to start sending the response over the wire,
300 303 ``sendresponse()`` is called. It handles emitting the header portion
301 304 of the response message. It then yields chunks of body data to be
302 305 written to the peer. Typically, the WSGI application itself calls
303 306 and returns the value from ``sendresponse()``.
304 307 """
305 308
306 309 def __init__(self, req, startresponse):
307 310 """Create an empty response tied to a specific request.
308 311
309 312 ``req`` is a ``parsedrequest``. ``startresponse`` is the
310 313 ``start_response`` function passed to the WSGI application.
311 314 """
312 315 self._req = req
313 316 self._startresponse = startresponse
314 317
315 318 self.status = None
316 319 self.headers = wsgiheaders.Headers([])
317 320
318 321 self._bodybytes = None
319 322 self._bodygen = None
320 323 self._started = False
321 324
322 325 def setbodybytes(self, b):
323 326 """Define the response body as static bytes."""
324 327 if self._bodybytes is not None or self._bodygen is not None:
325 328 raise error.ProgrammingError('cannot define body multiple times')
326 329
327 330 self._bodybytes = b
328 331 self.headers['Content-Length'] = '%d' % len(b)
329 332
330 333 def setbodygen(self, gen):
331 334 """Define the response body as a generator of bytes."""
332 335 if self._bodybytes is not None or self._bodygen is not None:
333 336 raise error.ProgrammingError('cannot define body multiple times')
334 337
335 338 self._bodygen = gen
336 339
337 340 def sendresponse(self):
338 341 """Send the generated response to the client.
339 342
340 343 Before this is called, ``status`` must be set and one of
341 344 ``setbodybytes()`` or ``setbodygen()`` must be called.
342 345
343 346 Calling this method multiple times is not allowed.
344 347 """
345 348 if self._started:
346 349 raise error.ProgrammingError('sendresponse() called multiple times')
347 350
348 351 self._started = True
349 352
350 353 if not self.status:
351 354 raise error.ProgrammingError('status line not defined')
352 355
353 356 if self._bodybytes is None and self._bodygen is None:
354 357 raise error.ProgrammingError('response body not defined')
355 358
356 359 # Various HTTP clients (notably httplib) won't read the HTTP response
357 360 # until the HTTP request has been sent in full. If servers (us) send a
358 361 # response before the HTTP request has been fully sent, the connection
359 362 # may deadlock because neither end is reading.
360 363 #
361 364 # We work around this by "draining" the request data before
362 365 # sending any response in some conditions.
363 366 drain = False
364 367 close = False
365 368
366 369 # If the client sent Expect: 100-continue, we assume it is smart enough
367 370 # to deal with the server sending a response before reading the request.
368 371 # (httplib doesn't do this.)
369 372 if self._req.headers.get('Expect', '').lower() == '100-continue':
370 373 pass
371 374 # Only tend to request methods that have bodies. Strictly speaking,
372 375 # we should sniff for a body. But this is fine for our existing
373 376 # WSGI applications.
374 377 elif self._req.method not in ('POST', 'PUT'):
375 378 pass
376 379 else:
377 380 # If we don't know how much data to read, there's no guarantee
378 381 # that we can drain the request responsibly. The WSGI
379 382 # specification only says that servers *should* ensure the
380 383 # input stream doesn't overrun the actual request. So there's
381 384 # no guarantee that reading until EOF won't corrupt the stream
382 385 # state.
383 386 if not isinstance(self._req.bodyfh, util.cappedreader):
384 387 close = True
385 388 else:
386 389 # We /could/ only drain certain HTTP response codes. But 200 and
387 390 # non-200 wire protocol responses both require draining. Since
388 391 # we have a capped reader in place for all situations where we
389 392 # drain, it is safe to read from that stream. We'll either do
390 393 # a drain or no-op if we're already at EOF.
391 394 drain = True
392 395
393 396 if close:
394 397 self.headers['Connection'] = 'Close'
395 398
396 399 if drain:
397 400 assert isinstance(self._req.bodyfh, util.cappedreader)
398 401 while True:
399 402 chunk = self._req.bodyfh.read(32768)
400 403 if not chunk:
401 404 break
402 405
403 406 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
404 407 if self._bodybytes:
405 408 yield self._bodybytes
406 409 elif self._bodygen:
407 410 for chunk in self._bodygen:
408 411 yield chunk
409 412 else:
410 413 error.ProgrammingError('do not know how to send body')
411 414
412 415 class wsgirequest(object):
413 416 """Higher-level API for a WSGI request.
414 417
415 418 WSGI applications are invoked with 2 arguments. They are used to
416 419 instantiate instances of this class, which provides higher-level APIs
417 420 for obtaining request parameters, writing HTTP output, etc.
418 421 """
419 422 def __init__(self, wsgienv, start_response):
420 423 version = wsgienv[r'wsgi.version']
421 424 if (version < (1, 0)) or (version >= (2, 0)):
422 425 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
423 426 % version)
424 427
425 428 inp = wsgienv[r'wsgi.input']
426 429
427 430 if r'HTTP_CONTENT_LENGTH' in wsgienv:
428 431 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
429 432 elif r'CONTENT_LENGTH' in wsgienv:
430 433 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
431 434
432 435 self.err = wsgienv[r'wsgi.errors']
433 436 self.threaded = wsgienv[r'wsgi.multithread']
434 437 self.multiprocess = wsgienv[r'wsgi.multiprocess']
435 438 self.run_once = wsgienv[r'wsgi.run_once']
436 439 self.env = wsgienv
437 440 self.req = parserequestfromenv(wsgienv, inp)
438 441 self.res = wsgiresponse(self.req, start_response)
439 442 self._start_response = start_response
440 443 self.server_write = None
441 444 self.headers = []
442 445
443 446 def respond(self, status, type, filename=None, body=None):
444 447 if not isinstance(type, str):
445 448 type = pycompat.sysstr(type)
446 449 if self._start_response is not None:
447 450 self.headers.append((r'Content-Type', type))
448 451 if filename:
449 452 filename = (filename.rpartition('/')[-1]
450 453 .replace('\\', '\\\\').replace('"', '\\"'))
451 454 self.headers.append(('Content-Disposition',
452 455 'inline; filename="%s"' % filename))
453 456 if body is not None:
454 457 self.headers.append((r'Content-Length', str(len(body))))
455 458
456 459 for k, v in self.headers:
457 460 if not isinstance(v, str):
458 461 raise TypeError('header value must be string: %r' % (v,))
459 462
460 463 if isinstance(status, ErrorResponse):
461 464 self.headers.extend(status.headers)
462 465 if status.code == HTTP_NOT_MODIFIED:
463 466 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
464 467 # it MUST NOT include any headers other than these and no
465 468 # body
466 469 self.headers = [(k, v) for (k, v) in self.headers if
467 470 k in ('Date', 'ETag', 'Expires',
468 471 'Cache-Control', 'Vary')]
469 472 status = statusmessage(status.code, pycompat.bytestr(status))
470 473 elif status == 200:
471 474 status = '200 Script output follows'
472 475 elif isinstance(status, int):
473 476 status = statusmessage(status)
474 477
475 478 # Various HTTP clients (notably httplib) won't read the HTTP
476 479 # response until the HTTP request has been sent in full. If servers
477 480 # (us) send a response before the HTTP request has been fully sent,
478 481 # the connection may deadlock because neither end is reading.
479 482 #
480 483 # We work around this by "draining" the request data before
481 484 # sending any response in some conditions.
482 485 drain = False
483 486 close = False
484 487
485 488 # If the client sent Expect: 100-continue, we assume it is smart
486 489 # enough to deal with the server sending a response before reading
487 490 # the request. (httplib doesn't do this.)
488 491 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
489 492 pass
490 493 # Only tend to request methods that have bodies. Strictly speaking,
491 494 # we should sniff for a body. But this is fine for our existing
492 495 # WSGI applications.
493 496 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
494 497 pass
495 498 else:
496 499 # If we don't know how much data to read, there's no guarantee
497 500 # that we can drain the request responsibly. The WSGI
498 501 # specification only says that servers *should* ensure the
499 502 # input stream doesn't overrun the actual request. So there's
500 503 # no guarantee that reading until EOF won't corrupt the stream
501 504 # state.
502 505 if not isinstance(self.req.bodyfh, util.cappedreader):
503 506 close = True
504 507 else:
505 508 # We /could/ only drain certain HTTP response codes. But 200
506 509 # and non-200 wire protocol responses both require draining.
507 510 # Since we have a capped reader in place for all situations
508 511 # where we drain, it is safe to read from that stream. We'll
509 512 # either do a drain or no-op if we're already at EOF.
510 513 drain = True
511 514
512 515 if close:
513 516 self.headers.append((r'Connection', r'Close'))
514 517
515 518 if drain:
516 519 assert isinstance(self.req.bodyfh, util.cappedreader)
517 520 while True:
518 521 chunk = self.req.bodyfh.read(32768)
519 522 if not chunk:
520 523 break
521 524
522 525 self.server_write = self._start_response(
523 526 pycompat.sysstr(status), self.headers)
524 527 self._start_response = None
525 528 self.headers = []
526 529 if body is not None:
527 530 self.write(body)
528 531 self.server_write = None
529 532
530 533 def write(self, thing):
531 534 if thing:
532 535 try:
533 536 self.server_write(thing)
534 537 except socket.error as inst:
535 538 if inst[0] != errno.ECONNRESET:
536 539 raise
537 540
538 541 def flush(self):
539 542 return None
540 543
541 544 def wsgiapplication(app_maker):
542 545 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
543 546 can and should now be used as a WSGI application.'''
544 547 application = app_maker()
545 548 def run_wsgi(env, respond):
546 549 return application(env, respond)
547 550 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now