request.py
637 lines
| 22.1 KiB
| text/x-python
|
PythonLexer
Eric Hopper
|
r2391 | # hgweb/request.py - An http request from either CGI or the standalone server. | ||
Eric Hopper
|
r2355 | # | ||
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net> | ||||
Raphaël Gomès
|
r47575 | # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com> | ||
Eric Hopper
|
r2355 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Eric Hopper
|
r2355 | |||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Yuya Nishihara
|
r27046 | |||
Augie Fackler
|
r43346 | # import wsgiref.validate | ||
Yuya Nishihara
|
r27046 | |||
Matt Harbison
|
r52622 | import typing | ||
Augie Fackler
|
r43346 | from ..thirdparty import attr | ||
Matt Harbison
|
r52622 | |||
# Force pytype to use the non-vendored package | ||||
if typing.TYPE_CHECKING: | ||||
# noinspection PyPackageRequirements | ||||
import attr | ||||
Yuya Nishihara
|
r27046 | from .. import ( | ||
Gregory Szorc
|
r36877 | error, | ||
Augie Fackler
|
r34515 | pycompat, | ||
Yuya Nishihara
|
r27046 | util, | ||
) | ||||
r47669 | from ..utils import ( | |||
urlutil, | ||||
) | ||||
Eric Hopper
|
r2355 | |||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49801 | class multidict: | ||
Gregory Szorc
|
r36878 | """A dict like object that can store multiple values for a key. | ||
Used to store parsed request parameters. | ||||
This is inspired by WebOb's class of the same name. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r36878 | def __init__(self): | ||
Gregory Szorc
|
r37012 | self._items = {} | ||
Gregory Szorc
|
r36878 | |||
def __getitem__(self, key): | ||||
"""Returns the last set value for a key.""" | ||||
Gregory Szorc
|
r37012 | return self._items[key][-1] | ||
Gregory Szorc
|
r36878 | |||
def __setitem__(self, key, value): | ||||
"""Replace a values for a key with a new value.""" | ||||
Gregory Szorc
|
r37012 | self._items[key] = [value] | ||
Gregory Szorc
|
r36878 | |||
def __delitem__(self, key): | ||||
"""Delete all values for a key.""" | ||||
Gregory Szorc
|
r37012 | del self._items[key] | ||
Gregory Szorc
|
r36878 | |||
def __contains__(self, key): | ||||
Gregory Szorc
|
r37012 | return key in self._items | ||
Gregory Szorc
|
r36878 | |||
def __len__(self): | ||||
return len(self._items) | ||||
def get(self, key, default=None): | ||||
try: | ||||
return self.__getitem__(key) | ||||
except KeyError: | ||||
return default | ||||
def add(self, key, value): | ||||
"""Add a new value for a key. Does not replace existing values.""" | ||||
Gregory Szorc
|
r37012 | self._items.setdefault(key, []).append(value) | ||
Gregory Szorc
|
r36878 | |||
def getall(self, key): | ||||
"""Obtains all values for a key.""" | ||||
Gregory Szorc
|
r37012 | return self._items.get(key, []) | ||
Gregory Szorc
|
r36878 | |||
def getone(self, key): | ||||
"""Obtain a single value for a key. | ||||
Raises KeyError if key not defined or it has multiple values set. | ||||
""" | ||||
Gregory Szorc
|
r37012 | vals = self._items[key] | ||
Gregory Szorc
|
r36878 | |||
if len(vals) > 1: | ||||
Augie Fackler
|
r43347 | raise KeyError(b'multiple values for %r' % key) | ||
Gregory Szorc
|
r36878 | |||
return vals[0] | ||||
def asdictoflists(self): | ||||
Gregory Szorc
|
r49768 | return {k: list(v) for k, v in self._items.items()} | ||
Gregory Szorc
|
r36878 | |||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r36824 | @attr.s(frozen=True) | ||
Gregory Szorc
|
r49801 | class parsedrequest: | ||
Gregory Szorc
|
r36873 | """Represents a parsed WSGI request. | ||
Contains both parsed parameters as well as a handle on the input stream. | ||||
""" | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36864 | # Request method. | ||
method = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Full URL for this request. | ||
url = attr.ib() | ||||
# URL without any path components. Just <proto>://<host><port>. | ||||
baseurl = attr.ib() | ||||
# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead | ||||
# of HTTP: Host header for hostname. This is likely what clients used. | ||||
advertisedurl = attr.ib() | ||||
advertisedbaseurl = attr.ib() | ||||
Gregory Szorc
|
r36883 | # URL scheme (part before ``://``). e.g. ``http`` or ``https``. | ||
urlscheme = attr.ib() | ||||
# Value of REMOTE_USER, if set, or None. | ||||
remoteuser = attr.ib() | ||||
# Value of REMOTE_HOST, if set, or None. | ||||
remotehost = attr.ib() | ||||
Gregory Szorc
|
r36915 | # Relative WSGI application path. If defined, will begin with a | ||
# ``/``. | ||||
Gregory Szorc
|
r36824 | apppath = attr.ib() | ||
# List of path parts to be used for dispatch. | ||||
dispatchparts = attr.ib() | ||||
Gregory Szorc
|
r36914 | # URL path component (no query string) used for dispatch. Can be | ||
# ``None`` to signal no path component given to the request, an | ||||
# empty string to signal a request to the application's root URL, | ||||
# or a string not beginning with ``/`` containing the requested | ||||
# path under the application. | ||||
Gregory Szorc
|
r36824 | dispatchpath = attr.ib() | ||
Gregory Szorc
|
r36884 | # The name of the repository being accessed. | ||
reponame = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Raw query string (part after "?" in URL). | ||
querystring = attr.ib() | ||||
Gregory Szorc
|
r36878 | # multidict of query string parameters. | ||
qsparams = attr.ib() | ||||
Gregory Szorc
|
r36832 | # wsgiref.headers.Headers instance. Operates like a dict with case | ||
# insensitive keys. | ||||
headers = attr.ib() | ||||
Gregory Szorc
|
r36873 | # Request body input stream. | ||
bodyfh = attr.ib() | ||||
Gregory Szorc
|
r36925 | # WSGI environment dict, unmodified. | ||
rawenv = attr.ib() | ||||
Gregory Szorc
|
r36824 | |||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r37836 | def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): | ||
Gregory Szorc
|
r36824 | """Parse URL components from environment variables. | ||
WSGI defines request attributes via environment variables. This function | ||||
parses the environment variables into a data structure. | ||||
Gregory Szorc
|
r36913 | |||
If ``reponame`` is defined, the leading path components matching that | ||||
string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. | ||||
This simulates the world view of a WSGI application that processes | ||||
requests from the base URL of a repo. | ||||
Gregory Szorc
|
r36916 | |||
If ``altbaseurl`` (typically comes from ``web.baseurl`` config option) | ||||
is defined, it is used - instead of the WSGI environment variables - for | ||||
constructing URL components up to and including the WSGI application path. | ||||
For example, if the current WSGI application is at ``/repo`` and a request | ||||
is made to ``/rev/@`` with this argument set to | ||||
``http://myserver:9000/prefix``, the URL and path components will resolve as | ||||
if the request were to ``http://myserver:9000/prefix/rev/@``. In other | ||||
words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and | ||||
``SCRIPT_NAME`` are all effectively replaced by components from this URL. | ||||
Gregory Szorc
|
r37836 | |||
``bodyfh`` can be used to specify a file object to read the request body | ||||
from. If not defined, ``wsgi.input`` from the environment dict is used. | ||||
Gregory Szorc
|
r36824 | """ | ||
Gregory Szorc
|
r36916 | # PEP 3333 defines the WSGI spec and is a useful reference for this code. | ||
Gregory Szorc
|
r36824 | |||
# We first validate that the incoming object conforms with the WSGI spec. | ||||
# We only want to be dealing with spec-conforming WSGI implementations. | ||||
# TODO enable this once we fix internal violations. | ||||
Augie Fackler
|
r43346 | # wsgiref.validate.check_environ(env) | ||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r49760 | # PEP-0333 states that environment keys and values are native strings. | ||
# The code points for the Unicode strings on Python 3 must be between | ||||
# \00000-\000FF. We deal with bytes in Mercurial, so mass convert string | ||||
# keys and values to bytes. | ||||
def tobytes(s): | ||||
if not isinstance(s, str): | ||||
return s | ||||
Manuel Jacob
|
r51713 | return s.encode('iso8859-1') | ||
Manuel Jacob
|
r45541 | |||
Gregory Szorc
|
r49768 | env = {tobytes(k): tobytes(v) for k, v in env.items()} | ||
Gregory Szorc
|
r36824 | |||
Matt Harbison
|
r37634 | # Some hosting solutions are emulating hgwebdir, and dispatching directly | ||
# to an hgweb instance using this environment variable. This was always | ||||
# checked prior to d7fd203e36cc; keep doing so to avoid breaking them. | ||||
if not reponame: | ||||
Augie Fackler
|
r43347 | reponame = env.get(b'REPO_NAME') | ||
Matt Harbison
|
r37634 | |||
Gregory Szorc
|
r36916 | if altbaseurl: | ||
r47669 | altbaseurl = urlutil.url(altbaseurl) | |||
Gregory Szorc
|
r36916 | |||
Gregory Szorc
|
r36824 | # https://www.python.org/dev/peps/pep-0333/#environ-variables defines | ||
# the environment variables. | ||||
# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines | ||||
# how URLs are reconstructed. | ||||
Augie Fackler
|
r43347 | fullurl = env[b'wsgi.url_scheme'] + b'://' | ||
Gregory Szorc
|
r36916 | |||
if altbaseurl and altbaseurl.scheme: | ||||
Augie Fackler
|
r43347 | advertisedfullurl = altbaseurl.scheme + b'://' | ||
Gregory Szorc
|
r36916 | else: | ||
advertisedfullurl = fullurl | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | def addport(s, port): | ||
Augie Fackler
|
r43347 | if s.startswith(b'https://'): | ||
if port != b'443': | ||||
s += b':' + port | ||||
Gregory Szorc
|
r36824 | else: | ||
Augie Fackler
|
r43347 | if port != b'80': | ||
s += b':' + port | ||||
Gregory Szorc
|
r36824 | |||
return s | ||||
Augie Fackler
|
r43347 | if env.get(b'HTTP_HOST'): | ||
fullurl += env[b'HTTP_HOST'] | ||||
Gregory Szorc
|
r36824 | else: | ||
Augie Fackler
|
r43347 | fullurl += env[b'SERVER_NAME'] | ||
fullurl = addport(fullurl, env[b'SERVER_PORT']) | ||||
Gregory Szorc
|
r36916 | |||
if altbaseurl and altbaseurl.host: | ||||
advertisedfullurl += altbaseurl.host | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | if altbaseurl.port: | ||
port = altbaseurl.port | ||||
Augie Fackler
|
r43347 | elif altbaseurl.scheme == b'http' and not altbaseurl.port: | ||
port = b'80' | ||||
elif altbaseurl.scheme == b'https' and not altbaseurl.port: | ||||
port = b'443' | ||||
Gregory Szorc
|
r36916 | else: | ||
Augie Fackler
|
r43347 | port = env[b'SERVER_PORT'] | ||
Gregory Szorc
|
r36916 | |||
advertisedfullurl = addport(advertisedfullurl, port) | ||||
else: | ||||
Augie Fackler
|
r43347 | advertisedfullurl += env[b'SERVER_NAME'] | ||
advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT']) | ||||
Gregory Szorc
|
r36824 | |||
baseurl = fullurl | ||||
advertisedbaseurl = advertisedfullurl | ||||
Augie Fackler
|
r43347 | fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b'')) | ||
fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b'')) | ||||
Gregory Szorc
|
r36916 | |||
if altbaseurl: | ||||
Augie Fackler
|
r43347 | path = altbaseurl.path or b'' | ||
if path and not path.startswith(b'/'): | ||||
path = b'/' + path | ||||
Gregory Szorc
|
r36916 | advertisedfullurl += util.urlreq.quote(path) | ||
else: | ||||
Augie Fackler
|
r43347 | advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b'')) | ||
Gregory Szorc
|
r36916 | |||
Augie Fackler
|
r43347 | advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b'')) | ||
Gregory Szorc
|
r36824 | |||
Augie Fackler
|
r43347 | if env.get(b'QUERY_STRING'): | ||
fullurl += b'?' + env[b'QUERY_STRING'] | ||||
advertisedfullurl += b'?' + env[b'QUERY_STRING'] | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36913 | # If ``reponame`` is defined, that must be a prefix on PATH_INFO | ||
# that represents the repository being dispatched to. When computing | ||||
# the dispatch info, we ignore these leading path components. | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | if altbaseurl: | ||
Augie Fackler
|
r43347 | apppath = altbaseurl.path or b'' | ||
if apppath and not apppath.startswith(b'/'): | ||||
apppath = b'/' + apppath | ||||
Gregory Szorc
|
r36916 | else: | ||
Augie Fackler
|
r43347 | apppath = env.get(b'SCRIPT_NAME', b'') | ||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36913 | if reponame: | ||
Augie Fackler
|
r43347 | repoprefix = b'/' + reponame.strip(b'/') | ||
Gregory Szorc
|
r36826 | |||
Augie Fackler
|
r43347 | if not env.get(b'PATH_INFO'): | ||
raise error.ProgrammingError(b'reponame requires PATH_INFO') | ||||
Gregory Szorc
|
r36913 | |||
Augie Fackler
|
r43347 | if not env[b'PATH_INFO'].startswith(repoprefix): | ||
Augie Fackler
|
r43346 | raise error.ProgrammingError( | ||
Augie Fackler
|
r43347 | b'PATH_INFO does not begin with repo ' | ||
b'name: %s (%s)' % (env[b'PATH_INFO'], reponame) | ||||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36824 | |||
Augie Fackler
|
r43347 | dispatchpath = env[b'PATH_INFO'][len(repoprefix) :] | ||
Gregory Szorc
|
r36824 | |||
Augie Fackler
|
r43347 | if dispatchpath and not dispatchpath.startswith(b'/'): | ||
Augie Fackler
|
r43346 | raise error.ProgrammingError( | ||
Augie Fackler
|
r43347 | b'reponame prefix of PATH_INFO does ' | ||
b'not end at path delimiter: %s (%s)' | ||||
% (env[b'PATH_INFO'], reponame) | ||||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36913 | |||
Augie Fackler
|
r43347 | apppath = apppath.rstrip(b'/') + repoprefix | ||
dispatchparts = dispatchpath.strip(b'/').split(b'/') | ||||
dispatchpath = b'/'.join(dispatchparts) | ||||
Gregory Szorc
|
r36914 | |||
Augie Fackler
|
r43347 | elif b'PATH_INFO' in env: | ||
if env[b'PATH_INFO'].strip(b'/'): | ||||
dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/') | ||||
dispatchpath = b'/'.join(dispatchparts) | ||||
Gregory Szorc
|
r36914 | else: | ||
dispatchparts = [] | ||||
Augie Fackler
|
r43347 | dispatchpath = b'' | ||
Gregory Szorc
|
r36824 | else: | ||
dispatchparts = [] | ||||
Gregory Szorc
|
r36914 | dispatchpath = None | ||
Gregory Szorc
|
r36824 | |||
Augie Fackler
|
r43347 | querystring = env.get(b'QUERY_STRING', b'') | ||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36827 | # We store as a list so we have ordering information. We also store as | ||
# a dict to facilitate fast lookup. | ||||
Gregory Szorc
|
r36878 | qsparams = multidict() | ||
for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True): | ||||
qsparams.add(k, v) | ||||
Gregory Szorc
|
r36827 | |||
Gregory Szorc
|
r36832 | # HTTP_* keys contain HTTP request headers. The Headers structure should | ||
# perform case normalization for us. We just rewrite underscore to dash | ||||
# so keys match what likely went over the wire. | ||||
headers = [] | ||||
Gregory Szorc
|
r49768 | for k, v in env.items(): | ||
Augie Fackler
|
r43347 | if k.startswith(b'HTTP_'): | ||
headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v)) | ||||
Gregory Szorc
|
r36832 | |||
Augie Fackler
|
r43346 | from . import wsgiheaders # avoid cycle | ||
Gregory Szorc
|
r36832 | headers = wsgiheaders.Headers(headers) | ||
Gregory Szorc
|
r36863 | # This is kind of a lie because the HTTP header wasn't explicitly | ||
# sent. But for all intents and purposes it should be OK to lie about | ||||
# this, since a consumer will either either value to determine how many | ||||
# bytes are available to read. | ||||
Augie Fackler
|
r43347 | if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env: | ||
headers[b'Content-Length'] = env[b'CONTENT_LENGTH'] | ||||
Gregory Szorc
|
r36863 | |||
Augie Fackler
|
r43347 | if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env: | ||
headers[b'Content-Type'] = env[b'CONTENT_TYPE'] | ||||
Gregory Szorc
|
r37067 | |||
Gregory Szorc
|
r37836 | if bodyfh is None: | ||
Augie Fackler
|
r43347 | bodyfh = env[b'wsgi.input'] | ||
if b'Content-Length' in headers: | ||||
Augie Fackler
|
r43346 | bodyfh = util.cappedreader( | ||
Augie Fackler
|
r43347 | bodyfh, int(headers[b'Content-Length'] or b'0') | ||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36873 | |||
Augie Fackler
|
r43346 | return parsedrequest( | ||
Augie Fackler
|
r43347 | method=env[b'REQUEST_METHOD'], | ||
Augie Fackler
|
r43346 | url=fullurl, | ||
baseurl=baseurl, | ||||
advertisedurl=advertisedfullurl, | ||||
advertisedbaseurl=advertisedbaseurl, | ||||
Augie Fackler
|
r43347 | urlscheme=env[b'wsgi.url_scheme'], | ||
remoteuser=env.get(b'REMOTE_USER'), | ||||
remotehost=env.get(b'REMOTE_HOST'), | ||||
Augie Fackler
|
r43346 | apppath=apppath, | ||
dispatchparts=dispatchparts, | ||||
dispatchpath=dispatchpath, | ||||
reponame=reponame, | ||||
querystring=querystring, | ||||
qsparams=qsparams, | ||||
headers=headers, | ||||
bodyfh=bodyfh, | ||||
rawenv=env, | ||||
) | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r49801 | class offsettrackingwriter: | ||
Gregory Szorc
|
r36891 | """A file object like object that is append only and tracks write count. | ||
Instances are bound to a callable. This callable is called with data | ||||
whenever a ``write()`` is attempted. | ||||
Instances track the amount of written data so they can answer ``tell()`` | ||||
requests. | ||||
The intent of this class is to wrap the ``write()`` function returned by | ||||
a WSGI ``start_response()`` function. Since ``write()`` is a callable and | ||||
not a file object, it doesn't implement other file object methods. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r36891 | def __init__(self, writefn): | ||
self._write = writefn | ||||
self._offset = 0 | ||||
def write(self, s): | ||||
res = self._write(s) | ||||
# Some Python objects don't report the number of bytes written. | ||||
if res is None: | ||||
self._offset += len(s) | ||||
else: | ||||
self._offset += res | ||||
def flush(self): | ||||
pass | ||||
def tell(self): | ||||
return self._offset | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49801 | class wsgiresponse: | ||
Gregory Szorc
|
r36877 | """Represents a response to a WSGI request. | ||
A response consists of a status line, headers, and a body. | ||||
Consumers must populate the ``status`` and ``headers`` fields and | ||||
make a call to a ``setbody*()`` method before the response can be | ||||
issued. | ||||
When it is time to start sending the response over the wire, | ||||
``sendresponse()`` is called. It handles emitting the header portion | ||||
of the response message. It then yields chunks of body data to be | ||||
written to the peer. Typically, the WSGI application itself calls | ||||
and returns the value from ``sendresponse()``. | ||||
""" | ||||
def __init__(self, req, startresponse): | ||||
"""Create an empty response tied to a specific request. | ||||
``req`` is a ``parsedrequest``. ``startresponse`` is the | ||||
``start_response`` function passed to the WSGI application. | ||||
""" | ||||
self._req = req | ||||
self._startresponse = startresponse | ||||
self.status = None | ||||
Augie Fackler
|
r43346 | from . import wsgiheaders # avoid cycle | ||
Gregory Szorc
|
r36877 | self.headers = wsgiheaders.Headers([]) | ||
self._bodybytes = None | ||||
self._bodygen = None | ||||
Gregory Szorc
|
r36892 | self._bodywillwrite = False | ||
Gregory Szorc
|
r36877 | self._started = False | ||
Gregory Szorc
|
r36892 | self._bodywritefn = None | ||
def _verifybody(self): | ||||
Augie Fackler
|
r43346 | if ( | ||
self._bodybytes is not None | ||||
or self._bodygen is not None | ||||
or self._bodywillwrite | ||||
): | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'cannot define body multiple times') | ||
Gregory Szorc
|
r36877 | |||
def setbodybytes(self, b): | ||||
Gregory Szorc
|
r36894 | """Define the response body as static bytes. | ||
The empty string signals that there is no response body. | ||||
""" | ||||
Gregory Szorc
|
r36892 | self._verifybody() | ||
Gregory Szorc
|
r36877 | self._bodybytes = b | ||
Augie Fackler
|
r43347 | self.headers[b'Content-Length'] = b'%d' % len(b) | ||
Gregory Szorc
|
r36877 | |||
def setbodygen(self, gen): | ||||
"""Define the response body as a generator of bytes.""" | ||||
Gregory Szorc
|
r36892 | self._verifybody() | ||
self._bodygen = gen | ||||
def setbodywillwrite(self): | ||||
"""Signal an intent to use write() to emit the response body. | ||||
**This is the least preferred way to send a body.** | ||||
Gregory Szorc
|
r36877 | |||
Gregory Szorc
|
r36892 | It is preferred for WSGI applications to emit a generator of chunks | ||
constituting the response body. However, some consumers can't emit | ||||
data this way. So, WSGI provides a way to obtain a ``write(data)`` | ||||
function that can be used to synchronously perform an unbuffered | ||||
write. | ||||
Calling this function signals an intent to produce the body in this | ||||
manner. | ||||
""" | ||||
self._verifybody() | ||||
self._bodywillwrite = True | ||||
Gregory Szorc
|
r36877 | |||
def sendresponse(self): | ||||
"""Send the generated response to the client. | ||||
Before this is called, ``status`` must be set and one of | ||||
``setbodybytes()`` or ``setbodygen()`` must be called. | ||||
Calling this method multiple times is not allowed. | ||||
""" | ||||
if self._started: | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError( | ||
b'sendresponse() called multiple times' | ||||
) | ||||
Gregory Szorc
|
r36877 | |||
self._started = True | ||||
if not self.status: | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'status line not defined') | ||
Gregory Szorc
|
r36877 | |||
Augie Fackler
|
r43346 | if ( | ||
self._bodybytes is None | ||||
and self._bodygen is None | ||||
and not self._bodywillwrite | ||||
Joerg Sonnenberger
|
r50741 | and self._req.method != b'HEAD' | ||
Augie Fackler
|
r43346 | ): | ||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'response body not defined') | ||
Gregory Szorc
|
r36877 | |||
Gregory Szorc
|
r36894 | # RFC 7232 Section 4.1 states that a 304 MUST generate one of | ||
# {Cache-Control, Content-Location, Date, ETag, Expires, Vary} | ||||
# and SHOULD NOT generate other headers unless they could be used | ||||
# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2 | ||||
# states that no response body can be issued. Content-Length can | ||||
# be sent. But if it is present, it should be the size of the response | ||||
# that wasn't transferred. | ||||
Augie Fackler
|
r43347 | if self.status.startswith(b'304 '): | ||
Gregory Szorc
|
r36894 | # setbodybytes('') will set C-L to 0. This doesn't conform with the | ||
# spec. So remove it. | ||||
Augie Fackler
|
r43347 | if self.headers.get(b'Content-Length') == b'0': | ||
del self.headers[b'Content-Length'] | ||||
Gregory Szorc
|
r36894 | |||
# Strictly speaking, this is too strict. But until it causes | ||||
# problems, let's be strict. | ||||
Augie Fackler
|
r43346 | badheaders = { | ||
k | ||||
for k in self.headers.keys() | ||||
if k.lower() | ||||
not in ( | ||||
Augie Fackler
|
r43347 | b'date', | ||
b'etag', | ||||
b'expires', | ||||
b'cache-control', | ||||
b'content-location', | ||||
b'content-security-policy', | ||||
b'vary', | ||||
Augie Fackler
|
r43346 | ) | ||
} | ||||
Gregory Szorc
|
r36894 | if badheaders: | ||
raise error.ProgrammingError( | ||||
Augie Fackler
|
r43347 | b'illegal header on 304 response: %s' | ||
% b', '.join(sorted(badheaders)) | ||||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36894 | |||
if self._bodygen is not None or self._bodywillwrite: | ||||
Augie Fackler
|
r43346 | raise error.ProgrammingError( | ||
Martin von Zweigbergk
|
r43387 | b"must use setbodybytes('') with 304 responses" | ||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36894 | |||
Gregory Szorc
|
r36877 | # Various HTTP clients (notably httplib) won't read the HTTP response | ||
# until the HTTP request has been sent in full. If servers (us) send a | ||||
# response before the HTTP request has been fully sent, the connection | ||||
# may deadlock because neither end is reading. | ||||
# | ||||
# We work around this by "draining" the request data before | ||||
# sending any response in some conditions. | ||||
drain = False | ||||
close = False | ||||
# If the client sent Expect: 100-continue, we assume it is smart enough | ||||
# to deal with the server sending a response before reading the request. | ||||
# (httplib doesn't do this.) | ||||
Augie Fackler
|
r43347 | if self._req.headers.get(b'Expect', b'').lower() == b'100-continue': | ||
Gregory Szorc
|
r36877 | pass | ||
# Only tend to request methods that have bodies. Strictly speaking, | ||||
# we should sniff for a body. But this is fine for our existing | ||||
# WSGI applications. | ||||
Augie Fackler
|
r43347 | elif self._req.method not in (b'POST', b'PUT'): | ||
Gregory Szorc
|
r36877 | pass | ||
else: | ||||
# If we don't know how much data to read, there's no guarantee | ||||
# that we can drain the request responsibly. The WSGI | ||||
# specification only says that servers *should* ensure the | ||||
# input stream doesn't overrun the actual request. So there's | ||||
# no guarantee that reading until EOF won't corrupt the stream | ||||
# state. | ||||
if not isinstance(self._req.bodyfh, util.cappedreader): | ||||
close = True | ||||
else: | ||||
# We /could/ only drain certain HTTP response codes. But 200 and | ||||
# non-200 wire protocol responses both require draining. Since | ||||
# we have a capped reader in place for all situations where we | ||||
# drain, it is safe to read from that stream. We'll either do | ||||
# a drain or no-op if we're already at EOF. | ||||
drain = True | ||||
if close: | ||||
Augie Fackler
|
r43347 | self.headers[b'Connection'] = b'Close' | ||
Gregory Szorc
|
r36877 | |||
if drain: | ||||
assert isinstance(self._req.bodyfh, util.cappedreader) | ||||
while True: | ||||
chunk = self._req.bodyfh.read(32768) | ||||
if not chunk: | ||||
break | ||||
Augie Fackler
|
r43346 | strheaders = [ | ||
(pycompat.strurl(k), pycompat.strurl(v)) | ||||
for k, v in self.headers.items() | ||||
] | ||||
write = self._startresponse(pycompat.sysstr(self.status), strheaders) | ||||
Gregory Szorc
|
r36892 | |||
Gregory Szorc
|
r36877 | if self._bodybytes: | ||
yield self._bodybytes | ||||
elif self._bodygen: | ||||
for chunk in self._bodygen: | ||||
Gregory Szorc
|
r40612 | # PEP-3333 says that output must be bytes. And some WSGI | ||
# implementations enforce this. We cast bytes-like types here | ||||
# for convenience. | ||||
if isinstance(chunk, bytearray): | ||||
chunk = bytes(chunk) | ||||
Gregory Szorc
|
r36877 | yield chunk | ||
Gregory Szorc
|
r36892 | elif self._bodywillwrite: | ||
self._bodywritefn = write | ||||
Joerg Sonnenberger
|
r50741 | elif self._req.method == b'HEAD': | ||
pass | ||||
Gregory Szorc
|
r36877 | else: | ||
Augie Fackler
|
r43347 | error.ProgrammingError(b'do not know how to send body') | ||
Gregory Szorc
|
r36877 | |||
Gregory Szorc
|
r36892 | def getbodyfile(self): | ||
"""Obtain a file object like object representing the response body. | ||||
For this to work, you must call ``setbodywillwrite()`` and then | ||||
``sendresponse()`` first. ``sendresponse()`` is a generator and the | ||||
function won't run to completion unless the generator is advanced. The | ||||
generator yields not items. The easiest way to consume it is with | ||||
``list(res.sendresponse())``, which should resolve to an empty list - | ||||
``[]``. | ||||
""" | ||||
if not self._bodywillwrite: | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'must call setbodywillwrite() first') | ||
Gregory Szorc
|
r36892 | |||
if not self._started: | ||||
Augie Fackler
|
r43346 | raise error.ProgrammingError( | ||
Augie Fackler
|
r43347 | b'must call sendresponse() first; did ' | ||
b'you remember to consume it since it ' | ||||
b'is a generator?' | ||||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r36892 | |||
assert self._bodywritefn | ||||
return offsettrackingwriter(self._bodywritefn) | ||||
Augie Fackler
|
r43346 | |||
Dirkjan Ochtman
|
r5566 | def wsgiapplication(app_maker): | ||
Augie Fackler
|
r46554 | """For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() | ||
can and should now be used as a WSGI application.""" | ||||
Thomas Arendsen Hein
|
r5760 | application = app_maker() | ||
Augie Fackler
|
r43346 | |||
Thomas Arendsen Hein
|
r5760 | def run_wsgi(env, respond): | ||
Dirkjan Ochtman
|
r5887 | return application(env, respond) | ||
Augie Fackler
|
r43346 | |||
Thomas Arendsen Hein
|
r5760 | return run_wsgi | ||