##// END OF EJS Templates
hgweb: refactor 304 handling code...
Gregory Szorc -
r36894:ccb70a77 default
parent child Browse files
Show More
@@ -1,456 +1,455 b''
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import contextlib
12 12 import os
13 13
14 14 from .common import (
15 15 ErrorResponse,
16 16 HTTP_BAD_REQUEST,
17 17 HTTP_NOT_FOUND,
18 HTTP_NOT_MODIFIED,
19 18 HTTP_OK,
20 19 HTTP_SERVER_ERROR,
21 20 cspvalues,
22 21 permhooks,
23 22 )
24 23
25 24 from .. import (
26 25 encoding,
27 26 error,
28 27 formatter,
29 28 hg,
30 29 hook,
31 30 profiling,
32 31 pycompat,
33 32 repoview,
34 33 templatefilters,
35 34 templater,
36 35 ui as uimod,
37 36 util,
38 37 wireprotoserver,
39 38 )
40 39
41 40 from . import (
42 41 request as requestmod,
43 42 webcommands,
44 43 webutil,
45 44 wsgicgi,
46 45 )
47 46
48 47 archivespecs = util.sortdict((
49 48 ('zip', ('application/zip', 'zip', '.zip', None)),
50 49 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
51 50 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
52 51 ))
53 52
54 53 def getstyle(req, configfn, templatepath):
55 54 styles = (
56 55 req.qsparams.get('style', None),
57 56 configfn('web', 'style'),
58 57 'paper',
59 58 )
60 59 return styles, templater.stylemap(styles, templatepath)
61 60
62 61 def makebreadcrumb(url, prefix=''):
63 62 '''Return a 'URL breadcrumb' list
64 63
65 64 A 'URL breadcrumb' is a list of URL-name pairs,
66 65 corresponding to each of the path items on a URL.
67 66 This can be used to create path navigation entries.
68 67 '''
69 68 if url.endswith('/'):
70 69 url = url[:-1]
71 70 if prefix:
72 71 url = '/' + prefix + url
73 72 relpath = url
74 73 if relpath.startswith('/'):
75 74 relpath = relpath[1:]
76 75
77 76 breadcrumb = []
78 77 urlel = url
79 78 pathitems = [''] + relpath.split('/')
80 79 for pathel in reversed(pathitems):
81 80 if not pathel or not urlel:
82 81 break
83 82 breadcrumb.append({'url': urlel, 'name': pathel})
84 83 urlel = os.path.dirname(urlel)
85 84 return reversed(breadcrumb)
86 85
87 86 class requestcontext(object):
88 87 """Holds state/context for an individual request.
89 88
90 89 Servers can be multi-threaded. Holding state on the WSGI application
91 90 is prone to race conditions. Instances of this class exist to hold
92 91 mutable and race-free state for requests.
93 92 """
94 93 def __init__(self, app, repo, req, res):
95 94 self.repo = repo
96 95 self.reponame = app.reponame
97 96 self.req = req
98 97 self.res = res
99 98
100 99 self.archivespecs = archivespecs
101 100
102 101 self.maxchanges = self.configint('web', 'maxchanges')
103 102 self.stripecount = self.configint('web', 'stripes')
104 103 self.maxshortchanges = self.configint('web', 'maxshortchanges')
105 104 self.maxfiles = self.configint('web', 'maxfiles')
106 105 self.allowpull = self.configbool('web', 'allow-pull')
107 106
108 107 # we use untrusted=False to prevent a repo owner from using
109 108 # web.templates in .hg/hgrc to get access to any file readable
110 109 # by the user running the CGI script
111 110 self.templatepath = self.config('web', 'templates', untrusted=False)
112 111
113 112 # This object is more expensive to build than simple config values.
114 113 # It is shared across requests. The app will replace the object
115 114 # if it is updated. Since this is a reference and nothing should
116 115 # modify the underlying object, it should be constant for the lifetime
117 116 # of the request.
118 117 self.websubtable = app.websubtable
119 118
120 119 self.csp, self.nonce = cspvalues(self.repo.ui)
121 120
122 121 # Trust the settings from the .hg/hgrc files by default.
123 122 def config(self, section, name, default=uimod._unset, untrusted=True):
124 123 return self.repo.ui.config(section, name, default,
125 124 untrusted=untrusted)
126 125
127 126 def configbool(self, section, name, default=uimod._unset, untrusted=True):
128 127 return self.repo.ui.configbool(section, name, default,
129 128 untrusted=untrusted)
130 129
131 130 def configint(self, section, name, default=uimod._unset, untrusted=True):
132 131 return self.repo.ui.configint(section, name, default,
133 132 untrusted=untrusted)
134 133
135 134 def configlist(self, section, name, default=uimod._unset, untrusted=True):
136 135 return self.repo.ui.configlist(section, name, default,
137 136 untrusted=untrusted)
138 137
139 138 def archivelist(self, nodeid):
140 139 allowed = self.configlist('web', 'allow_archive')
141 140 for typ, spec in self.archivespecs.iteritems():
142 141 if typ in allowed or self.configbool('web', 'allow%s' % typ):
143 142 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
144 143
145 144 def templater(self, req):
146 145 # determine scheme, port and server name
147 146 # this is needed to create absolute urls
148 147 logourl = self.config('web', 'logourl')
149 148 logoimg = self.config('web', 'logoimg')
150 149 staticurl = (self.config('web', 'staticurl')
151 150 or req.apppath + '/static/')
152 151 if not staticurl.endswith('/'):
153 152 staticurl += '/'
154 153
155 154 # some functions for the templater
156 155
157 156 def motd(**map):
158 157 yield self.config('web', 'motd')
159 158
160 159 # figure out which style to use
161 160
162 161 vars = {}
163 162 styles, (style, mapfile) = getstyle(req, self.config,
164 163 self.templatepath)
165 164 if style == styles[0]:
166 165 vars['style'] = style
167 166
168 167 sessionvars = webutil.sessionvars(vars, '?')
169 168
170 169 if not self.reponame:
171 170 self.reponame = (self.config('web', 'name', '')
172 171 or req.reponame
173 172 or req.apppath
174 173 or self.repo.root)
175 174
176 175 def websubfilter(text):
177 176 return templatefilters.websub(text, self.websubtable)
178 177
179 178 # create the templater
180 179 # TODO: export all keywords: defaults = templatekw.keywords.copy()
181 180 defaults = {
182 181 'url': req.apppath + '/',
183 182 'logourl': logourl,
184 183 'logoimg': logoimg,
185 184 'staticurl': staticurl,
186 185 'urlbase': req.advertisedbaseurl,
187 186 'repo': self.reponame,
188 187 'encoding': encoding.encoding,
189 188 'motd': motd,
190 189 'sessionvars': sessionvars,
191 190 'pathdef': makebreadcrumb(req.apppath),
192 191 'style': style,
193 192 'nonce': self.nonce,
194 193 }
195 194 tres = formatter.templateresources(self.repo.ui, self.repo)
196 195 tmpl = templater.templater.frommapfile(mapfile,
197 196 filters={'websub': websubfilter},
198 197 defaults=defaults,
199 198 resources=tres)
200 199 return tmpl
201 200
202 201
203 202 class hgweb(object):
204 203 """HTTP server for individual repositories.
205 204
206 205 Instances of this class serve HTTP responses for a particular
207 206 repository.
208 207
209 208 Instances are typically used as WSGI applications.
210 209
211 210 Some servers are multi-threaded. On these servers, there may
212 211 be multiple active threads inside __call__.
213 212 """
214 213 def __init__(self, repo, name=None, baseui=None):
215 214 if isinstance(repo, str):
216 215 if baseui:
217 216 u = baseui.copy()
218 217 else:
219 218 u = uimod.ui.load()
220 219 r = hg.repository(u, repo)
221 220 else:
222 221 # we trust caller to give us a private copy
223 222 r = repo
224 223
225 224 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
226 225 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
227 226 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
228 227 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
229 228 # resolve file patterns relative to repo root
230 229 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
231 230 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
232 231 # displaying bundling progress bar while serving feel wrong and may
233 232 # break some wsgi implementation.
234 233 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
235 234 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
236 235 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
237 236 self._lastrepo = self._repos[0]
238 237 hook.redirect(True)
239 238 self.reponame = name
240 239
241 240 def _webifyrepo(self, repo):
242 241 repo = getwebview(repo)
243 242 self.websubtable = webutil.getwebsubs(repo)
244 243 return repo
245 244
246 245 @contextlib.contextmanager
247 246 def _obtainrepo(self):
248 247 """Obtain a repo unique to the caller.
249 248
250 249 Internally we maintain a stack of cachedlocalrepo instances
251 250 to be handed out. If one is available, we pop it and return it,
252 251 ensuring it is up to date in the process. If one is not available,
253 252 we clone the most recently used repo instance and return it.
254 253
255 254 It is currently possible for the stack to grow without bounds
256 255 if the server allows infinite threads. However, servers should
257 256 have a thread limit, thus establishing our limit.
258 257 """
259 258 if self._repos:
260 259 cached = self._repos.pop()
261 260 r, created = cached.fetch()
262 261 else:
263 262 cached = self._lastrepo.copy()
264 263 r, created = cached.fetch()
265 264 if created:
266 265 r = self._webifyrepo(r)
267 266
268 267 self._lastrepo = cached
269 268 self.mtime = cached.mtime
270 269 try:
271 270 yield r
272 271 finally:
273 272 self._repos.append(cached)
274 273
275 274 def run(self):
276 275 """Start a server from CGI environment.
277 276
278 277 Modern servers should be using WSGI and should avoid this
279 278 method, if possible.
280 279 """
281 280 if not encoding.environ.get('GATEWAY_INTERFACE',
282 281 '').startswith("CGI/1."):
283 282 raise RuntimeError("This function is only intended to be "
284 283 "called while running as a CGI script.")
285 284 wsgicgi.launch(self)
286 285
287 286 def __call__(self, env, respond):
288 287 """Run the WSGI application.
289 288
290 289 This may be called by multiple threads.
291 290 """
292 291 req = requestmod.wsgirequest(env, respond)
293 292 return self.run_wsgi(req)
294 293
295 294 def run_wsgi(self, wsgireq):
296 295 """Internal method to run the WSGI application.
297 296
298 297 This is typically only called by Mercurial. External consumers
299 298 should be using instances of this class as the WSGI application.
300 299 """
301 300 with self._obtainrepo() as repo:
302 301 profile = repo.ui.configbool('profiling', 'enabled')
303 302 with profiling.profile(repo.ui, enabled=profile):
304 303 for r in self._runwsgi(wsgireq, repo):
305 304 yield r
306 305
307 306 def _runwsgi(self, wsgireq, repo):
308 307 req = wsgireq.req
309 308 res = wsgireq.res
310 309 rctx = requestcontext(self, repo, req, res)
311 310
312 311 # This state is global across all threads.
313 312 encoding.encoding = rctx.config('web', 'encoding')
314 313 rctx.repo.ui.environ = wsgireq.env
315 314
316 315 if rctx.csp:
317 316 # hgwebdir may have added CSP header. Since we generate our own,
318 317 # replace it.
319 318 wsgireq.headers = [h for h in wsgireq.headers
320 319 if h[0] != 'Content-Security-Policy']
321 320 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
322 321 res.headers['Content-Security-Policy'] = rctx.csp
323 322
324 323 handled = wireprotoserver.handlewsgirequest(
325 324 rctx, req, res, self.check_perm)
326 325 if handled:
327 326 return res.sendresponse()
328 327
329 328 if req.havepathinfo:
330 329 query = req.dispatchpath
331 330 else:
332 331 query = req.querystring.partition('&')[0].partition(';')[0]
333 332
334 333 # translate user-visible url structure to internal structure
335 334
336 335 args = query.split('/', 2)
337 336 if 'cmd' not in req.qsparams and args and args[0]:
338 337 cmd = args.pop(0)
339 338 style = cmd.rfind('-')
340 339 if style != -1:
341 340 req.qsparams['style'] = cmd[:style]
342 341 cmd = cmd[style + 1:]
343 342
344 343 # avoid accepting e.g. style parameter as command
345 344 if util.safehasattr(webcommands, cmd):
346 345 req.qsparams['cmd'] = cmd
347 346
348 347 if cmd == 'static':
349 348 req.qsparams['file'] = '/'.join(args)
350 349 else:
351 350 if args and args[0]:
352 351 node = args.pop(0).replace('%2F', '/')
353 352 req.qsparams['node'] = node
354 353 if args:
355 354 if 'file' in req.qsparams:
356 355 del req.qsparams['file']
357 356 for a in args:
358 357 req.qsparams.add('file', a)
359 358
360 359 ua = req.headers.get('User-Agent', '')
361 360 if cmd == 'rev' and 'mercurial' in ua:
362 361 req.qsparams['style'] = 'raw'
363 362
364 363 if cmd == 'archive':
365 364 fn = req.qsparams['node']
366 365 for type_, spec in rctx.archivespecs.iteritems():
367 366 ext = spec[2]
368 367 if fn.endswith(ext):
369 368 req.qsparams['node'] = fn[:-len(ext)]
370 369 req.qsparams['type'] = type_
371 370 else:
372 371 cmd = req.qsparams.get('cmd', '')
373 372
374 373 # process the web interface request
375 374
376 375 try:
377 376 tmpl = rctx.templater(req)
378 377 ctype = tmpl('mimetype', encoding=encoding.encoding)
379 378 ctype = templater.stringify(ctype)
380 379
381 380 # check read permissions non-static content
382 381 if cmd != 'static':
383 382 self.check_perm(rctx, req, None)
384 383
385 384 if cmd == '':
386 385 req.qsparams['cmd'] = tmpl.cache['default']
387 386 cmd = req.qsparams['cmd']
388 387
389 388 # Don't enable caching if using a CSP nonce because then it wouldn't
390 389 # be a nonce.
391 390 if rctx.configbool('web', 'cache') and not rctx.nonce:
392 391 tag = 'W/"%d"' % self.mtime
393 392 if req.headers.get('If-None-Match') == tag:
394 raise ErrorResponse(HTTP_NOT_MODIFIED)
393 res.status = '304 Not Modified'
394 # Response body not allowed on 304.
395 res.setbodybytes('')
396 return res.sendresponse()
395 397
396 398 wsgireq.headers.append((r'ETag', pycompat.sysstr(tag)))
397 399 res.headers['ETag'] = tag
398 400
399 401 if cmd not in webcommands.__all__:
400 402 msg = 'no such method: %s' % cmd
401 403 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
402 404 else:
403 405 # Set some globals appropriate for web handlers. Commands can
404 406 # override easily enough.
405 407 res.status = '200 Script output follows'
406 408 res.headers['Content-Type'] = ctype
407 409 content = getattr(webcommands, cmd)(rctx, wsgireq, tmpl)
408 410
409 411 if content is res:
410 412 return res.sendresponse()
411 413 elif content is True:
412 414 return []
413 415 else:
414 416 wsgireq.respond(HTTP_OK, ctype)
415 417 return content
416 418
417 419 except (error.LookupError, error.RepoLookupError) as err:
418 420 wsgireq.respond(HTTP_NOT_FOUND, ctype)
419 421 msg = pycompat.bytestr(err)
420 422 if (util.safehasattr(err, 'name') and
421 423 not isinstance(err, error.ManifestLookupError)):
422 424 msg = 'revision not found: %s' % err.name
423 425 return tmpl('error', error=msg)
424 426 except (error.RepoError, error.RevlogError) as inst:
425 427 wsgireq.respond(HTTP_SERVER_ERROR, ctype)
426 428 return tmpl('error', error=pycompat.bytestr(inst))
427 429 except ErrorResponse as inst:
428 430 wsgireq.respond(inst, ctype)
429 if inst.code == HTTP_NOT_MODIFIED:
430 # Not allowed to return a body on a 304
431 return ['']
432 431 return tmpl('error', error=pycompat.bytestr(inst))
433 432
434 433 def check_perm(self, rctx, req, op):
435 434 for permhook in permhooks:
436 435 permhook(rctx, req, op)
437 436
438 437 def getwebview(repo):
439 438 """The 'web.view' config controls changeset filter to hgweb. Possible
440 439 values are ``served``, ``visible`` and ``all``. Default is ``served``.
441 440 The ``served`` filter only shows changesets that can be pulled from the
442 441 hgweb instance. The``visible`` filter includes secret changesets but
443 442 still excludes "hidden" one.
444 443
445 444 See the repoview module for details.
446 445
447 446 The option has been around undocumented since Mercurial 2.5, but no
448 447 user ever asked about it. So we better keep it undocumented for now."""
449 448 # experimental config: web.view
450 449 viewconfig = repo.ui.config('web', 'view', untrusted=True)
451 450 if viewconfig == 'all':
452 451 return repo.unfiltered()
453 452 elif viewconfig in repoview.filtertable:
454 453 return repo.filtered(viewconfig)
455 454 else:
456 455 return repo.filtered('served')
@@ -1,627 +1,651 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 HTTP_NOT_MODIFIED,
19 18 statusmessage,
20 19 )
21 20
22 21 from ..thirdparty import (
23 22 attr,
24 23 )
25 24 from .. import (
26 25 error,
27 26 pycompat,
28 27 util,
29 28 )
30 29
31 30 class multidict(object):
32 31 """A dict like object that can store multiple values for a key.
33 32
34 33 Used to store parsed request parameters.
35 34
36 35 This is inspired by WebOb's class of the same name.
37 36 """
38 37 def __init__(self):
39 38 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
40 39 # don't rely on parameters that much, so it shouldn't be a perf issue.
41 40 # we can always add dict for fast lookups.
42 41 self._items = []
43 42
44 43 def __getitem__(self, key):
45 44 """Returns the last set value for a key."""
46 45 for k, v in reversed(self._items):
47 46 if k == key:
48 47 return v
49 48
50 49 raise KeyError(key)
51 50
52 51 def __setitem__(self, key, value):
53 52 """Replace a values for a key with a new value."""
54 53 try:
55 54 del self[key]
56 55 except KeyError:
57 56 pass
58 57
59 58 self._items.append((key, value))
60 59
61 60 def __delitem__(self, key):
62 61 """Delete all values for a key."""
63 62 oldlen = len(self._items)
64 63
65 64 self._items[:] = [(k, v) for k, v in self._items if k != key]
66 65
67 66 if oldlen == len(self._items):
68 67 raise KeyError(key)
69 68
70 69 def __contains__(self, key):
71 70 return any(k == key for k, v in self._items)
72 71
73 72 def __len__(self):
74 73 return len(self._items)
75 74
76 75 def get(self, key, default=None):
77 76 try:
78 77 return self.__getitem__(key)
79 78 except KeyError:
80 79 return default
81 80
82 81 def add(self, key, value):
83 82 """Add a new value for a key. Does not replace existing values."""
84 83 self._items.append((key, value))
85 84
86 85 def getall(self, key):
87 86 """Obtains all values for a key."""
88 87 return [v for k, v in self._items if k == key]
89 88
90 89 def getone(self, key):
91 90 """Obtain a single value for a key.
92 91
93 92 Raises KeyError if key not defined or it has multiple values set.
94 93 """
95 94 vals = self.getall(key)
96 95
97 96 if not vals:
98 97 raise KeyError(key)
99 98
100 99 if len(vals) > 1:
101 100 raise KeyError('multiple values for %r' % key)
102 101
103 102 return vals[0]
104 103
105 104 def asdictoflists(self):
106 105 d = {}
107 106 for k, v in self._items:
108 107 if k in d:
109 108 d[k].append(v)
110 109 else:
111 110 d[k] = [v]
112 111
113 112 return d
114 113
115 114 @attr.s(frozen=True)
116 115 class parsedrequest(object):
117 116 """Represents a parsed WSGI request.
118 117
119 118 Contains both parsed parameters as well as a handle on the input stream.
120 119 """
121 120
122 121 # Request method.
123 122 method = attr.ib()
124 123 # Full URL for this request.
125 124 url = attr.ib()
126 125 # URL without any path components. Just <proto>://<host><port>.
127 126 baseurl = attr.ib()
128 127 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
129 128 # of HTTP: Host header for hostname. This is likely what clients used.
130 129 advertisedurl = attr.ib()
131 130 advertisedbaseurl = attr.ib()
132 131 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
133 132 urlscheme = attr.ib()
134 133 # Value of REMOTE_USER, if set, or None.
135 134 remoteuser = attr.ib()
136 135 # Value of REMOTE_HOST, if set, or None.
137 136 remotehost = attr.ib()
138 137 # WSGI application path.
139 138 apppath = attr.ib()
140 139 # List of path parts to be used for dispatch.
141 140 dispatchparts = attr.ib()
142 141 # URL path component (no query string) used for dispatch.
143 142 dispatchpath = attr.ib()
144 143 # Whether there is a path component to this request. This can be true
145 144 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
146 145 havepathinfo = attr.ib()
147 146 # The name of the repository being accessed.
148 147 reponame = attr.ib()
149 148 # Raw query string (part after "?" in URL).
150 149 querystring = attr.ib()
151 150 # multidict of query string parameters.
152 151 qsparams = attr.ib()
153 152 # wsgiref.headers.Headers instance. Operates like a dict with case
154 153 # insensitive keys.
155 154 headers = attr.ib()
156 155 # Request body input stream.
157 156 bodyfh = attr.ib()
158 157
159 158 def parserequestfromenv(env, bodyfh):
160 159 """Parse URL components from environment variables.
161 160
162 161 WSGI defines request attributes via environment variables. This function
163 162 parses the environment variables into a data structure.
164 163 """
165 164 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
166 165
167 166 # We first validate that the incoming object conforms with the WSGI spec.
168 167 # We only want to be dealing with spec-conforming WSGI implementations.
169 168 # TODO enable this once we fix internal violations.
170 169 #wsgiref.validate.check_environ(env)
171 170
172 171 # PEP-0333 states that environment keys and values are native strings
173 172 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
174 173 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
175 174 # in Mercurial, so mass convert string keys and values to bytes.
176 175 if pycompat.ispy3:
177 176 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
178 177 env = {k: v.encode('latin-1') if isinstance(v, str) else v
179 178 for k, v in env.iteritems()}
180 179
181 180 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
182 181 # the environment variables.
183 182 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
184 183 # how URLs are reconstructed.
185 184 fullurl = env['wsgi.url_scheme'] + '://'
186 185 advertisedfullurl = fullurl
187 186
188 187 def addport(s):
189 188 if env['wsgi.url_scheme'] == 'https':
190 189 if env['SERVER_PORT'] != '443':
191 190 s += ':' + env['SERVER_PORT']
192 191 else:
193 192 if env['SERVER_PORT'] != '80':
194 193 s += ':' + env['SERVER_PORT']
195 194
196 195 return s
197 196
198 197 if env.get('HTTP_HOST'):
199 198 fullurl += env['HTTP_HOST']
200 199 else:
201 200 fullurl += env['SERVER_NAME']
202 201 fullurl = addport(fullurl)
203 202
204 203 advertisedfullurl += env['SERVER_NAME']
205 204 advertisedfullurl = addport(advertisedfullurl)
206 205
207 206 baseurl = fullurl
208 207 advertisedbaseurl = advertisedfullurl
209 208
210 209 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
211 210 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
212 211 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
213 212 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
214 213
215 214 if env.get('QUERY_STRING'):
216 215 fullurl += '?' + env['QUERY_STRING']
217 216 advertisedfullurl += '?' + env['QUERY_STRING']
218 217
219 218 # When dispatching requests, we look at the URL components (PATH_INFO
220 219 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
221 220 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
222 221 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
223 222 # root. We also exclude its path components from PATH_INFO when resolving
224 223 # the dispatch path.
225 224
226 225 apppath = env['SCRIPT_NAME']
227 226
228 227 if env.get('REPO_NAME'):
229 228 if not apppath.endswith('/'):
230 229 apppath += '/'
231 230
232 231 apppath += env.get('REPO_NAME')
233 232
234 233 if 'PATH_INFO' in env:
235 234 dispatchparts = env['PATH_INFO'].strip('/').split('/')
236 235
237 236 # Strip out repo parts.
238 237 repoparts = env.get('REPO_NAME', '').split('/')
239 238 if dispatchparts[:len(repoparts)] == repoparts:
240 239 dispatchparts = dispatchparts[len(repoparts):]
241 240 else:
242 241 dispatchparts = []
243 242
244 243 dispatchpath = '/'.join(dispatchparts)
245 244
246 245 querystring = env.get('QUERY_STRING', '')
247 246
248 247 # We store as a list so we have ordering information. We also store as
249 248 # a dict to facilitate fast lookup.
250 249 qsparams = multidict()
251 250 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
252 251 qsparams.add(k, v)
253 252
254 253 # HTTP_* keys contain HTTP request headers. The Headers structure should
255 254 # perform case normalization for us. We just rewrite underscore to dash
256 255 # so keys match what likely went over the wire.
257 256 headers = []
258 257 for k, v in env.iteritems():
259 258 if k.startswith('HTTP_'):
260 259 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
261 260
262 261 headers = wsgiheaders.Headers(headers)
263 262
264 263 # This is kind of a lie because the HTTP header wasn't explicitly
265 264 # sent. But for all intents and purposes it should be OK to lie about
266 265 # this, since a consumer will either either value to determine how many
267 266 # bytes are available to read.
268 267 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
269 268 headers['Content-Length'] = env['CONTENT_LENGTH']
270 269
271 270 # TODO do this once we remove wsgirequest.inp, otherwise we could have
272 271 # multiple readers from the underlying input stream.
273 272 #bodyfh = env['wsgi.input']
274 273 #if 'Content-Length' in headers:
275 274 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
276 275
277 276 return parsedrequest(method=env['REQUEST_METHOD'],
278 277 url=fullurl, baseurl=baseurl,
279 278 advertisedurl=advertisedfullurl,
280 279 advertisedbaseurl=advertisedbaseurl,
281 280 urlscheme=env['wsgi.url_scheme'],
282 281 remoteuser=env.get('REMOTE_USER'),
283 282 remotehost=env.get('REMOTE_HOST'),
284 283 apppath=apppath,
285 284 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
286 285 havepathinfo='PATH_INFO' in env,
287 286 reponame=env.get('REPO_NAME'),
288 287 querystring=querystring,
289 288 qsparams=qsparams,
290 289 headers=headers,
291 290 bodyfh=bodyfh)
292 291
293 292 class offsettrackingwriter(object):
294 293 """A file object like object that is append only and tracks write count.
295 294
296 295 Instances are bound to a callable. This callable is called with data
297 296 whenever a ``write()`` is attempted.
298 297
299 298 Instances track the amount of written data so they can answer ``tell()``
300 299 requests.
301 300
302 301 The intent of this class is to wrap the ``write()`` function returned by
303 302 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
304 303 not a file object, it doesn't implement other file object methods.
305 304 """
306 305 def __init__(self, writefn):
307 306 self._write = writefn
308 307 self._offset = 0
309 308
310 309 def write(self, s):
311 310 res = self._write(s)
312 311 # Some Python objects don't report the number of bytes written.
313 312 if res is None:
314 313 self._offset += len(s)
315 314 else:
316 315 self._offset += res
317 316
318 317 def flush(self):
319 318 pass
320 319
321 320 def tell(self):
322 321 return self._offset
323 322
324 323 class wsgiresponse(object):
325 324 """Represents a response to a WSGI request.
326 325
327 326 A response consists of a status line, headers, and a body.
328 327
329 328 Consumers must populate the ``status`` and ``headers`` fields and
330 329 make a call to a ``setbody*()`` method before the response can be
331 330 issued.
332 331
333 332 When it is time to start sending the response over the wire,
334 333 ``sendresponse()`` is called. It handles emitting the header portion
335 334 of the response message. It then yields chunks of body data to be
336 335 written to the peer. Typically, the WSGI application itself calls
337 336 and returns the value from ``sendresponse()``.
338 337 """
339 338
340 339 def __init__(self, req, startresponse):
341 340 """Create an empty response tied to a specific request.
342 341
343 342 ``req`` is a ``parsedrequest``. ``startresponse`` is the
344 343 ``start_response`` function passed to the WSGI application.
345 344 """
346 345 self._req = req
347 346 self._startresponse = startresponse
348 347
349 348 self.status = None
350 349 self.headers = wsgiheaders.Headers([])
351 350
352 351 self._bodybytes = None
353 352 self._bodygen = None
354 353 self._bodywillwrite = False
355 354 self._started = False
356 355 self._bodywritefn = None
357 356
358 357 def _verifybody(self):
359 358 if (self._bodybytes is not None or self._bodygen is not None
360 359 or self._bodywillwrite):
361 360 raise error.ProgrammingError('cannot define body multiple times')
362 361
363 362 def setbodybytes(self, b):
364 """Define the response body as static bytes."""
363 """Define the response body as static bytes.
364
365 The empty string signals that there is no response body.
366 """
365 367 self._verifybody()
366 368 self._bodybytes = b
367 369 self.headers['Content-Length'] = '%d' % len(b)
368 370
369 371 def setbodygen(self, gen):
370 372 """Define the response body as a generator of bytes."""
371 373 self._verifybody()
372 374 self._bodygen = gen
373 375
374 376 def setbodywillwrite(self):
375 377 """Signal an intent to use write() to emit the response body.
376 378
377 379 **This is the least preferred way to send a body.**
378 380
379 381 It is preferred for WSGI applications to emit a generator of chunks
380 382 constituting the response body. However, some consumers can't emit
381 383 data this way. So, WSGI provides a way to obtain a ``write(data)``
382 384 function that can be used to synchronously perform an unbuffered
383 385 write.
384 386
385 387 Calling this function signals an intent to produce the body in this
386 388 manner.
387 389 """
388 390 self._verifybody()
389 391 self._bodywillwrite = True
390 392
391 393 def sendresponse(self):
392 394 """Send the generated response to the client.
393 395
394 396 Before this is called, ``status`` must be set and one of
395 397 ``setbodybytes()`` or ``setbodygen()`` must be called.
396 398
397 399 Calling this method multiple times is not allowed.
398 400 """
399 401 if self._started:
400 402 raise error.ProgrammingError('sendresponse() called multiple times')
401 403
402 404 self._started = True
403 405
404 406 if not self.status:
405 407 raise error.ProgrammingError('status line not defined')
406 408
407 409 if (self._bodybytes is None and self._bodygen is None
408 410 and not self._bodywillwrite):
409 411 raise error.ProgrammingError('response body not defined')
410 412
413 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
414 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
415 # and SHOULD NOT generate other headers unless they could be used
416 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
417 # states that no response body can be issued. Content-Length can
418 # be sent. But if it is present, it should be the size of the response
419 # that wasn't transferred.
420 if self.status.startswith('304 '):
421 # setbodybytes('') will set C-L to 0. This doesn't conform with the
422 # spec. So remove it.
423 if self.headers.get('Content-Length') == '0':
424 del self.headers['Content-Length']
425
426 # Strictly speaking, this is too strict. But until it causes
427 # problems, let's be strict.
428 badheaders = {k for k in self.headers.keys()
429 if k.lower() not in ('date', 'etag', 'expires',
430 'cache-control',
431 'content-location',
432 'vary')}
433 if badheaders:
434 raise error.ProgrammingError(
435 'illegal header on 304 response: %s' %
436 ', '.join(sorted(badheaders)))
437
438 if self._bodygen is not None or self._bodywillwrite:
439 raise error.ProgrammingError("must use setbodybytes('') with "
440 "304 responses")
441
411 442 # Various HTTP clients (notably httplib) won't read the HTTP response
412 443 # until the HTTP request has been sent in full. If servers (us) send a
413 444 # response before the HTTP request has been fully sent, the connection
414 445 # may deadlock because neither end is reading.
415 446 #
416 447 # We work around this by "draining" the request data before
417 448 # sending any response in some conditions.
418 449 drain = False
419 450 close = False
420 451
421 452 # If the client sent Expect: 100-continue, we assume it is smart enough
422 453 # to deal with the server sending a response before reading the request.
423 454 # (httplib doesn't do this.)
424 455 if self._req.headers.get('Expect', '').lower() == '100-continue':
425 456 pass
426 457 # Only tend to request methods that have bodies. Strictly speaking,
427 458 # we should sniff for a body. But this is fine for our existing
428 459 # WSGI applications.
429 460 elif self._req.method not in ('POST', 'PUT'):
430 461 pass
431 462 else:
432 463 # If we don't know how much data to read, there's no guarantee
433 464 # that we can drain the request responsibly. The WSGI
434 465 # specification only says that servers *should* ensure the
435 466 # input stream doesn't overrun the actual request. So there's
436 467 # no guarantee that reading until EOF won't corrupt the stream
437 468 # state.
438 469 if not isinstance(self._req.bodyfh, util.cappedreader):
439 470 close = True
440 471 else:
441 472 # We /could/ only drain certain HTTP response codes. But 200 and
442 473 # non-200 wire protocol responses both require draining. Since
443 474 # we have a capped reader in place for all situations where we
444 475 # drain, it is safe to read from that stream. We'll either do
445 476 # a drain or no-op if we're already at EOF.
446 477 drain = True
447 478
448 479 if close:
449 480 self.headers['Connection'] = 'Close'
450 481
451 482 if drain:
452 483 assert isinstance(self._req.bodyfh, util.cappedreader)
453 484 while True:
454 485 chunk = self._req.bodyfh.read(32768)
455 486 if not chunk:
456 487 break
457 488
458 489 write = self._startresponse(pycompat.sysstr(self.status),
459 490 self.headers.items())
460 491
461 492 if self._bodybytes:
462 493 yield self._bodybytes
463 494 elif self._bodygen:
464 495 for chunk in self._bodygen:
465 496 yield chunk
466 497 elif self._bodywillwrite:
467 498 self._bodywritefn = write
468 499 else:
469 500 error.ProgrammingError('do not know how to send body')
470 501
471 502 def getbodyfile(self):
472 503 """Obtain a file object like object representing the response body.
473 504
474 505 For this to work, you must call ``setbodywillwrite()`` and then
475 506 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
476 507 function won't run to completion unless the generator is advanced. The
477 508 generator yields not items. The easiest way to consume it is with
478 509 ``list(res.sendresponse())``, which should resolve to an empty list -
479 510 ``[]``.
480 511 """
481 512 if not self._bodywillwrite:
482 513 raise error.ProgrammingError('must call setbodywillwrite() first')
483 514
484 515 if not self._started:
485 516 raise error.ProgrammingError('must call sendresponse() first; did '
486 517 'you remember to consume it since it '
487 518 'is a generator?')
488 519
489 520 assert self._bodywritefn
490 521 return offsettrackingwriter(self._bodywritefn)
491 522
492 523 class wsgirequest(object):
493 524 """Higher-level API for a WSGI request.
494 525
495 526 WSGI applications are invoked with 2 arguments. They are used to
496 527 instantiate instances of this class, which provides higher-level APIs
497 528 for obtaining request parameters, writing HTTP output, etc.
498 529 """
499 530 def __init__(self, wsgienv, start_response):
500 531 version = wsgienv[r'wsgi.version']
501 532 if (version < (1, 0)) or (version >= (2, 0)):
502 533 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
503 534 % version)
504 535
505 536 inp = wsgienv[r'wsgi.input']
506 537
507 538 if r'HTTP_CONTENT_LENGTH' in wsgienv:
508 539 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
509 540 elif r'CONTENT_LENGTH' in wsgienv:
510 541 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
511 542
512 543 self.err = wsgienv[r'wsgi.errors']
513 544 self.threaded = wsgienv[r'wsgi.multithread']
514 545 self.multiprocess = wsgienv[r'wsgi.multiprocess']
515 546 self.run_once = wsgienv[r'wsgi.run_once']
516 547 self.env = wsgienv
517 548 self.req = parserequestfromenv(wsgienv, inp)
518 549 self.res = wsgiresponse(self.req, start_response)
519 550 self._start_response = start_response
520 551 self.server_write = None
521 552 self.headers = []
522 553
523 554 def respond(self, status, type, filename=None, body=None):
524 555 if not isinstance(type, str):
525 556 type = pycompat.sysstr(type)
526 557 if self._start_response is not None:
527 558 self.headers.append((r'Content-Type', type))
528 559 if filename:
529 560 filename = (filename.rpartition('/')[-1]
530 561 .replace('\\', '\\\\').replace('"', '\\"'))
531 562 self.headers.append(('Content-Disposition',
532 563 'inline; filename="%s"' % filename))
533 564 if body is not None:
534 565 self.headers.append((r'Content-Length', str(len(body))))
535 566
536 567 for k, v in self.headers:
537 568 if not isinstance(v, str):
538 569 raise TypeError('header value must be string: %r' % (v,))
539 570
540 571 if isinstance(status, ErrorResponse):
541 572 self.headers.extend(status.headers)
542 if status.code == HTTP_NOT_MODIFIED:
543 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
544 # it MUST NOT include any headers other than these and no
545 # body
546 self.headers = [(k, v) for (k, v) in self.headers if
547 k in ('Date', 'ETag', 'Expires',
548 'Cache-Control', 'Vary')]
549 573 status = statusmessage(status.code, pycompat.bytestr(status))
550 574 elif status == 200:
551 575 status = '200 Script output follows'
552 576 elif isinstance(status, int):
553 577 status = statusmessage(status)
554 578
555 579 # Various HTTP clients (notably httplib) won't read the HTTP
556 580 # response until the HTTP request has been sent in full. If servers
557 581 # (us) send a response before the HTTP request has been fully sent,
558 582 # the connection may deadlock because neither end is reading.
559 583 #
560 584 # We work around this by "draining" the request data before
561 585 # sending any response in some conditions.
562 586 drain = False
563 587 close = False
564 588
565 589 # If the client sent Expect: 100-continue, we assume it is smart
566 590 # enough to deal with the server sending a response before reading
567 591 # the request. (httplib doesn't do this.)
568 592 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
569 593 pass
570 594 # Only tend to request methods that have bodies. Strictly speaking,
571 595 # we should sniff for a body. But this is fine for our existing
572 596 # WSGI applications.
573 597 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
574 598 pass
575 599 else:
576 600 # If we don't know how much data to read, there's no guarantee
577 601 # that we can drain the request responsibly. The WSGI
578 602 # specification only says that servers *should* ensure the
579 603 # input stream doesn't overrun the actual request. So there's
580 604 # no guarantee that reading until EOF won't corrupt the stream
581 605 # state.
582 606 if not isinstance(self.req.bodyfh, util.cappedreader):
583 607 close = True
584 608 else:
585 609 # We /could/ only drain certain HTTP response codes. But 200
586 610 # and non-200 wire protocol responses both require draining.
587 611 # Since we have a capped reader in place for all situations
588 612 # where we drain, it is safe to read from that stream. We'll
589 613 # either do a drain or no-op if we're already at EOF.
590 614 drain = True
591 615
592 616 if close:
593 617 self.headers.append((r'Connection', r'Close'))
594 618
595 619 if drain:
596 620 assert isinstance(self.req.bodyfh, util.cappedreader)
597 621 while True:
598 622 chunk = self.req.bodyfh.read(32768)
599 623 if not chunk:
600 624 break
601 625
602 626 self.server_write = self._start_response(
603 627 pycompat.sysstr(status), self.headers)
604 628 self._start_response = None
605 629 self.headers = []
606 630 if body is not None:
607 631 self.write(body)
608 632 self.server_write = None
609 633
610 634 def write(self, thing):
611 635 if thing:
612 636 try:
613 637 self.server_write(thing)
614 638 except socket.error as inst:
615 639 if inst[0] != errno.ECONNRESET:
616 640 raise
617 641
618 642 def flush(self):
619 643 return None
620 644
621 645 def wsgiapplication(app_maker):
622 646 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
623 647 can and should now be used as a WSGI application.'''
624 648 application = app_maker()
625 649 def run_wsgi(env, respond):
626 650 return application(env, respond)
627 651 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now