request.py
713 lines
| 26.9 KiB
| text/x-python
|
PythonLexer
Eric Hopper
|
r2391 | # hgweb/request.py - An http request from either CGI or the standalone server. | ||
Eric Hopper
|
r2355 | # | ||
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net> | ||||
Vadim Gelfer
|
r2859 | # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
Eric Hopper
|
r2355 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Eric Hopper
|
r2355 | |||
Yuya Nishihara
|
r27046 | from __future__ import absolute_import | ||
import errno | ||||
import socket | ||||
Gregory Szorc
|
r36832 | import wsgiref.headers as wsgiheaders | ||
Gregory Szorc
|
r36824 | #import wsgiref.validate | ||
Yuya Nishihara
|
r27046 | |||
from .common import ( | ||||
ErrorResponse, | ||||
statusmessage, | ||||
) | ||||
Gregory Szorc
|
r36824 | from ..thirdparty import ( | ||
attr, | ||||
) | ||||
Yuya Nishihara
|
r27046 | from .. import ( | ||
Gregory Szorc
|
r36877 | error, | ||
Augie Fackler
|
r34515 | pycompat, | ||
Yuya Nishihara
|
r27046 | util, | ||
) | ||||
Eric Hopper
|
r2355 | |||
Gregory Szorc
|
r36878 | class multidict(object): | ||
"""A dict like object that can store multiple values for a key. | ||||
Used to store parsed request parameters. | ||||
This is inspired by WebOb's class of the same name. | ||||
""" | ||||
def __init__(self): | ||||
# Stores (key, value) 2-tuples. This isn't the most efficient. But we | ||||
# don't rely on parameters that much, so it shouldn't be a perf issue. | ||||
# we can always add dict for fast lookups. | ||||
self._items = [] | ||||
def __getitem__(self, key): | ||||
"""Returns the last set value for a key.""" | ||||
for k, v in reversed(self._items): | ||||
if k == key: | ||||
return v | ||||
raise KeyError(key) | ||||
def __setitem__(self, key, value): | ||||
"""Replace a values for a key with a new value.""" | ||||
try: | ||||
del self[key] | ||||
except KeyError: | ||||
pass | ||||
self._items.append((key, value)) | ||||
def __delitem__(self, key): | ||||
"""Delete all values for a key.""" | ||||
oldlen = len(self._items) | ||||
self._items[:] = [(k, v) for k, v in self._items if k != key] | ||||
if oldlen == len(self._items): | ||||
raise KeyError(key) | ||||
def __contains__(self, key): | ||||
return any(k == key for k, v in self._items) | ||||
def __len__(self): | ||||
return len(self._items) | ||||
def get(self, key, default=None): | ||||
try: | ||||
return self.__getitem__(key) | ||||
except KeyError: | ||||
return default | ||||
def add(self, key, value): | ||||
"""Add a new value for a key. Does not replace existing values.""" | ||||
self._items.append((key, value)) | ||||
def getall(self, key): | ||||
"""Obtains all values for a key.""" | ||||
return [v for k, v in self._items if k == key] | ||||
def getone(self, key): | ||||
"""Obtain a single value for a key. | ||||
Raises KeyError if key not defined or it has multiple values set. | ||||
""" | ||||
vals = self.getall(key) | ||||
if not vals: | ||||
raise KeyError(key) | ||||
if len(vals) > 1: | ||||
raise KeyError('multiple values for %r' % key) | ||||
return vals[0] | ||||
def asdictoflists(self): | ||||
d = {} | ||||
for k, v in self._items: | ||||
if k in d: | ||||
d[k].append(v) | ||||
else: | ||||
d[k] = [v] | ||||
return d | ||||
Gregory Szorc
|
r36824 | @attr.s(frozen=True) | ||
class parsedrequest(object): | ||||
Gregory Szorc
|
r36873 | """Represents a parsed WSGI request. | ||
Contains both parsed parameters as well as a handle on the input stream. | ||||
""" | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36864 | # Request method. | ||
method = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Full URL for this request. | ||
url = attr.ib() | ||||
# URL without any path components. Just <proto>://<host><port>. | ||||
baseurl = attr.ib() | ||||
# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead | ||||
# of HTTP: Host header for hostname. This is likely what clients used. | ||||
advertisedurl = attr.ib() | ||||
advertisedbaseurl = attr.ib() | ||||
Gregory Szorc
|
r36883 | # URL scheme (part before ``://``). e.g. ``http`` or ``https``. | ||
urlscheme = attr.ib() | ||||
# Value of REMOTE_USER, if set, or None. | ||||
remoteuser = attr.ib() | ||||
# Value of REMOTE_HOST, if set, or None. | ||||
remotehost = attr.ib() | ||||
Gregory Szorc
|
r36915 | # Relative WSGI application path. If defined, will begin with a | ||
# ``/``. | ||||
Gregory Szorc
|
r36824 | apppath = attr.ib() | ||
# List of path parts to be used for dispatch. | ||||
dispatchparts = attr.ib() | ||||
Gregory Szorc
|
r36914 | # URL path component (no query string) used for dispatch. Can be | ||
# ``None`` to signal no path component given to the request, an | ||||
# empty string to signal a request to the application's root URL, | ||||
# or a string not beginning with ``/`` containing the requested | ||||
# path under the application. | ||||
Gregory Szorc
|
r36824 | dispatchpath = attr.ib() | ||
Gregory Szorc
|
r36884 | # The name of the repository being accessed. | ||
reponame = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Raw query string (part after "?" in URL). | ||
querystring = attr.ib() | ||||
Gregory Szorc
|
r36878 | # multidict of query string parameters. | ||
qsparams = attr.ib() | ||||
Gregory Szorc
|
r36832 | # wsgiref.headers.Headers instance. Operates like a dict with case | ||
# insensitive keys. | ||||
headers = attr.ib() | ||||
Gregory Szorc
|
r36873 | # Request body input stream. | ||
bodyfh = attr.ib() | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None): | ||
Gregory Szorc
|
r36824 | """Parse URL components from environment variables. | ||
WSGI defines request attributes via environment variables. This function | ||||
parses the environment variables into a data structure. | ||||
Gregory Szorc
|
r36913 | |||
If ``reponame`` is defined, the leading path components matching that | ||||
string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. | ||||
This simulates the world view of a WSGI application that processes | ||||
requests from the base URL of a repo. | ||||
Gregory Szorc
|
r36916 | |||
If ``altbaseurl`` (typically comes from ``web.baseurl`` config option) | ||||
is defined, it is used - instead of the WSGI environment variables - for | ||||
constructing URL components up to and including the WSGI application path. | ||||
For example, if the current WSGI application is at ``/repo`` and a request | ||||
is made to ``/rev/@`` with this argument set to | ||||
``http://myserver:9000/prefix``, the URL and path components will resolve as | ||||
if the request were to ``http://myserver:9000/prefix/rev/@``. In other | ||||
words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and | ||||
``SCRIPT_NAME`` are all effectively replaced by components from this URL. | ||||
Gregory Szorc
|
r36824 | """ | ||
Gregory Szorc
|
r36916 | # PEP 3333 defines the WSGI spec and is a useful reference for this code. | ||
Gregory Szorc
|
r36824 | |||
# We first validate that the incoming object conforms with the WSGI spec. | ||||
# We only want to be dealing with spec-conforming WSGI implementations. | ||||
# TODO enable this once we fix internal violations. | ||||
#wsgiref.validate.check_environ(env) | ||||
# PEP-0333 states that environment keys and values are native strings | ||||
# (bytes on Python 2 and str on Python 3). The code points for the Unicode | ||||
# strings on Python 3 must be between \00000-\000FF. We deal with bytes | ||||
# in Mercurial, so mass convert string keys and values to bytes. | ||||
if pycompat.ispy3: | ||||
env = {k.encode('latin-1'): v for k, v in env.iteritems()} | ||||
env = {k: v.encode('latin-1') if isinstance(v, str) else v | ||||
for k, v in env.iteritems()} | ||||
Gregory Szorc
|
r36916 | if altbaseurl: | ||
altbaseurl = util.url(altbaseurl) | ||||
Gregory Szorc
|
r36824 | # https://www.python.org/dev/peps/pep-0333/#environ-variables defines | ||
# the environment variables. | ||||
# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines | ||||
# how URLs are reconstructed. | ||||
fullurl = env['wsgi.url_scheme'] + '://' | ||||
Gregory Szorc
|
r36916 | |||
if altbaseurl and altbaseurl.scheme: | ||||
advertisedfullurl = altbaseurl.scheme + '://' | ||||
else: | ||||
advertisedfullurl = fullurl | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | def addport(s, port): | ||
if s.startswith('https://'): | ||||
if port != '443': | ||||
s += ':' + port | ||||
Gregory Szorc
|
r36824 | else: | ||
Gregory Szorc
|
r36916 | if port != '80': | ||
s += ':' + port | ||||
Gregory Szorc
|
r36824 | |||
return s | ||||
if env.get('HTTP_HOST'): | ||||
fullurl += env['HTTP_HOST'] | ||||
else: | ||||
fullurl += env['SERVER_NAME'] | ||||
Gregory Szorc
|
r36916 | fullurl = addport(fullurl, env['SERVER_PORT']) | ||
if altbaseurl and altbaseurl.host: | ||||
advertisedfullurl += altbaseurl.host | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | if altbaseurl.port: | ||
port = altbaseurl.port | ||||
elif altbaseurl.scheme == 'http' and not altbaseurl.port: | ||||
port = '80' | ||||
elif altbaseurl.scheme == 'https' and not altbaseurl.port: | ||||
port = '443' | ||||
else: | ||||
port = env['SERVER_PORT'] | ||||
advertisedfullurl = addport(advertisedfullurl, port) | ||||
else: | ||||
advertisedfullurl += env['SERVER_NAME'] | ||||
advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT']) | ||||
Gregory Szorc
|
r36824 | |||
baseurl = fullurl | ||||
advertisedbaseurl = advertisedfullurl | ||||
fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | ||||
fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | ||||
Gregory Szorc
|
r36916 | |||
if altbaseurl: | ||||
path = altbaseurl.path or '' | ||||
if path and not path.startswith('/'): | ||||
path = '/' + path | ||||
advertisedfullurl += util.urlreq.quote(path) | ||||
else: | ||||
advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | ||||
Gregory Szorc
|
r36824 | advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | ||
if env.get('QUERY_STRING'): | ||||
fullurl += '?' + env['QUERY_STRING'] | ||||
advertisedfullurl += '?' + env['QUERY_STRING'] | ||||
Gregory Szorc
|
r36913 | # If ``reponame`` is defined, that must be a prefix on PATH_INFO | ||
# that represents the repository being dispatched to. When computing | ||||
# the dispatch info, we ignore these leading path components. | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36916 | if altbaseurl: | ||
apppath = altbaseurl.path or '' | ||||
if apppath and not apppath.startswith('/'): | ||||
apppath = '/' + apppath | ||||
else: | ||||
apppath = env.get('SCRIPT_NAME', '') | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36913 | if reponame: | ||
repoprefix = '/' + reponame.strip('/') | ||||
Gregory Szorc
|
r36826 | |||
Gregory Szorc
|
r36913 | if not env.get('PATH_INFO'): | ||
raise error.ProgrammingError('reponame requires PATH_INFO') | ||||
if not env['PATH_INFO'].startswith(repoprefix): | ||||
raise error.ProgrammingError('PATH_INFO does not begin with repo ' | ||||
'name: %s (%s)' % (env['PATH_INFO'], | ||||
reponame)) | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36913 | dispatchpath = env['PATH_INFO'][len(repoprefix):] | ||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36913 | if dispatchpath and not dispatchpath.startswith('/'): | ||
raise error.ProgrammingError('reponame prefix of PATH_INFO does ' | ||||
'not end at path delimiter: %s (%s)' % | ||||
(env['PATH_INFO'], reponame)) | ||||
apppath = apppath.rstrip('/') + repoprefix | ||||
dispatchparts = dispatchpath.strip('/').split('/') | ||||
Gregory Szorc
|
r36914 | dispatchpath = '/'.join(dispatchparts) | ||
elif 'PATH_INFO' in env: | ||||
if env['PATH_INFO'].strip('/'): | ||||
dispatchparts = env['PATH_INFO'].strip('/').split('/') | ||||
dispatchpath = '/'.join(dispatchparts) | ||||
else: | ||||
dispatchparts = [] | ||||
dispatchpath = '' | ||||
Gregory Szorc
|
r36824 | else: | ||
dispatchparts = [] | ||||
Gregory Szorc
|
r36914 | dispatchpath = None | ||
Gregory Szorc
|
r36824 | |||
querystring = env.get('QUERY_STRING', '') | ||||
Gregory Szorc
|
r36827 | # We store as a list so we have ordering information. We also store as | ||
# a dict to facilitate fast lookup. | ||||
Gregory Szorc
|
r36878 | qsparams = multidict() | ||
for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True): | ||||
qsparams.add(k, v) | ||||
Gregory Szorc
|
r36827 | |||
Gregory Szorc
|
r36832 | # HTTP_* keys contain HTTP request headers. The Headers structure should | ||
# perform case normalization for us. We just rewrite underscore to dash | ||||
# so keys match what likely went over the wire. | ||||
headers = [] | ||||
for k, v in env.iteritems(): | ||||
if k.startswith('HTTP_'): | ||||
headers.append((k[len('HTTP_'):].replace('_', '-'), v)) | ||||
headers = wsgiheaders.Headers(headers) | ||||
Gregory Szorc
|
r36863 | # This is kind of a lie because the HTTP header wasn't explicitly | ||
# sent. But for all intents and purposes it should be OK to lie about | ||||
# this, since a consumer will either either value to determine how many | ||||
# bytes are available to read. | ||||
if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: | ||||
headers['Content-Length'] = env['CONTENT_LENGTH'] | ||||
Gregory Szorc
|
r36873 | # TODO do this once we remove wsgirequest.inp, otherwise we could have | ||
# multiple readers from the underlying input stream. | ||||
#bodyfh = env['wsgi.input'] | ||||
#if 'Content-Length' in headers: | ||||
# bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length'])) | ||||
Gregory Szorc
|
r36864 | return parsedrequest(method=env['REQUEST_METHOD'], | ||
url=fullurl, baseurl=baseurl, | ||||
Gregory Szorc
|
r36824 | advertisedurl=advertisedfullurl, | ||
advertisedbaseurl=advertisedbaseurl, | ||||
Gregory Szorc
|
r36883 | urlscheme=env['wsgi.url_scheme'], | ||
remoteuser=env.get('REMOTE_USER'), | ||||
remotehost=env.get('REMOTE_HOST'), | ||||
Gregory Szorc
|
r36824 | apppath=apppath, | ||
dispatchparts=dispatchparts, dispatchpath=dispatchpath, | ||||
Gregory Szorc
|
r36913 | reponame=reponame, | ||
Gregory Szorc
|
r36827 | querystring=querystring, | ||
Gregory Szorc
|
r36878 | qsparams=qsparams, | ||
Gregory Szorc
|
r36873 | headers=headers, | ||
bodyfh=bodyfh) | ||||
Gregory Szorc
|
r36824 | |||
Gregory Szorc
|
r36891 | class offsettrackingwriter(object): | ||
"""A file object like object that is append only and tracks write count. | ||||
Instances are bound to a callable. This callable is called with data | ||||
whenever a ``write()`` is attempted. | ||||
Instances track the amount of written data so they can answer ``tell()`` | ||||
requests. | ||||
The intent of this class is to wrap the ``write()`` function returned by | ||||
a WSGI ``start_response()`` function. Since ``write()`` is a callable and | ||||
not a file object, it doesn't implement other file object methods. | ||||
""" | ||||
def __init__(self, writefn): | ||||
self._write = writefn | ||||
self._offset = 0 | ||||
def write(self, s): | ||||
res = self._write(s) | ||||
# Some Python objects don't report the number of bytes written. | ||||
if res is None: | ||||
self._offset += len(s) | ||||
else: | ||||
self._offset += res | ||||
def flush(self): | ||||
pass | ||||
def tell(self): | ||||
return self._offset | ||||
Gregory Szorc
|
r36877 | class wsgiresponse(object): | ||
"""Represents a response to a WSGI request. | ||||
A response consists of a status line, headers, and a body. | ||||
Consumers must populate the ``status`` and ``headers`` fields and | ||||
make a call to a ``setbody*()`` method before the response can be | ||||
issued. | ||||
When it is time to start sending the response over the wire, | ||||
``sendresponse()`` is called. It handles emitting the header portion | ||||
of the response message. It then yields chunks of body data to be | ||||
written to the peer. Typically, the WSGI application itself calls | ||||
and returns the value from ``sendresponse()``. | ||||
""" | ||||
def __init__(self, req, startresponse): | ||||
"""Create an empty response tied to a specific request. | ||||
``req`` is a ``parsedrequest``. ``startresponse`` is the | ||||
``start_response`` function passed to the WSGI application. | ||||
""" | ||||
self._req = req | ||||
self._startresponse = startresponse | ||||
self.status = None | ||||
self.headers = wsgiheaders.Headers([]) | ||||
self._bodybytes = None | ||||
self._bodygen = None | ||||
Gregory Szorc
|
r36892 | self._bodywillwrite = False | ||
Gregory Szorc
|
r36877 | self._started = False | ||
Gregory Szorc
|
r36892 | self._bodywritefn = None | ||
def _verifybody(self): | ||||
if (self._bodybytes is not None or self._bodygen is not None | ||||
or self._bodywillwrite): | ||||
raise error.ProgrammingError('cannot define body multiple times') | ||||
Gregory Szorc
|
r36877 | |||
def setbodybytes(self, b): | ||||
Gregory Szorc
|
r36894 | """Define the response body as static bytes. | ||
The empty string signals that there is no response body. | ||||
""" | ||||
Gregory Szorc
|
r36892 | self._verifybody() | ||
Gregory Szorc
|
r36877 | self._bodybytes = b | ||
self.headers['Content-Length'] = '%d' % len(b) | ||||
def setbodygen(self, gen): | ||||
"""Define the response body as a generator of bytes.""" | ||||
Gregory Szorc
|
r36892 | self._verifybody() | ||
self._bodygen = gen | ||||
def setbodywillwrite(self): | ||||
"""Signal an intent to use write() to emit the response body. | ||||
**This is the least preferred way to send a body.** | ||||
Gregory Szorc
|
r36877 | |||
Gregory Szorc
|
r36892 | It is preferred for WSGI applications to emit a generator of chunks | ||
constituting the response body. However, some consumers can't emit | ||||
data this way. So, WSGI provides a way to obtain a ``write(data)`` | ||||
function that can be used to synchronously perform an unbuffered | ||||
write. | ||||
Calling this function signals an intent to produce the body in this | ||||
manner. | ||||
""" | ||||
self._verifybody() | ||||
self._bodywillwrite = True | ||||
Gregory Szorc
|
r36877 | |||
def sendresponse(self): | ||||
"""Send the generated response to the client. | ||||
Before this is called, ``status`` must be set and one of | ||||
``setbodybytes()`` or ``setbodygen()`` must be called. | ||||
Calling this method multiple times is not allowed. | ||||
""" | ||||
if self._started: | ||||
raise error.ProgrammingError('sendresponse() called multiple times') | ||||
self._started = True | ||||
if not self.status: | ||||
raise error.ProgrammingError('status line not defined') | ||||
Gregory Szorc
|
r36892 | if (self._bodybytes is None and self._bodygen is None | ||
and not self._bodywillwrite): | ||||
Gregory Szorc
|
r36877 | raise error.ProgrammingError('response body not defined') | ||
Gregory Szorc
|
r36894 | # RFC 7232 Section 4.1 states that a 304 MUST generate one of | ||
# {Cache-Control, Content-Location, Date, ETag, Expires, Vary} | ||||
# and SHOULD NOT generate other headers unless they could be used | ||||
# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2 | ||||
# states that no response body can be issued. Content-Length can | ||||
# be sent. But if it is present, it should be the size of the response | ||||
# that wasn't transferred. | ||||
if self.status.startswith('304 '): | ||||
# setbodybytes('') will set C-L to 0. This doesn't conform with the | ||||
# spec. So remove it. | ||||
if self.headers.get('Content-Length') == '0': | ||||
del self.headers['Content-Length'] | ||||
# Strictly speaking, this is too strict. But until it causes | ||||
# problems, let's be strict. | ||||
badheaders = {k for k in self.headers.keys() | ||||
if k.lower() not in ('date', 'etag', 'expires', | ||||
'cache-control', | ||||
'content-location', | ||||
'vary')} | ||||
if badheaders: | ||||
raise error.ProgrammingError( | ||||
'illegal header on 304 response: %s' % | ||||
', '.join(sorted(badheaders))) | ||||
if self._bodygen is not None or self._bodywillwrite: | ||||
raise error.ProgrammingError("must use setbodybytes('') with " | ||||
"304 responses") | ||||
Gregory Szorc
|
r36877 | # Various HTTP clients (notably httplib) won't read the HTTP response | ||
# until the HTTP request has been sent in full. If servers (us) send a | ||||
# response before the HTTP request has been fully sent, the connection | ||||
# may deadlock because neither end is reading. | ||||
# | ||||
# We work around this by "draining" the request data before | ||||
# sending any response in some conditions. | ||||
drain = False | ||||
close = False | ||||
# If the client sent Expect: 100-continue, we assume it is smart enough | ||||
# to deal with the server sending a response before reading the request. | ||||
# (httplib doesn't do this.) | ||||
if self._req.headers.get('Expect', '').lower() == '100-continue': | ||||
pass | ||||
# Only tend to request methods that have bodies. Strictly speaking, | ||||
# we should sniff for a body. But this is fine for our existing | ||||
# WSGI applications. | ||||
elif self._req.method not in ('POST', 'PUT'): | ||||
pass | ||||
else: | ||||
# If we don't know how much data to read, there's no guarantee | ||||
# that we can drain the request responsibly. The WSGI | ||||
# specification only says that servers *should* ensure the | ||||
# input stream doesn't overrun the actual request. So there's | ||||
# no guarantee that reading until EOF won't corrupt the stream | ||||
# state. | ||||
if not isinstance(self._req.bodyfh, util.cappedreader): | ||||
close = True | ||||
else: | ||||
# We /could/ only drain certain HTTP response codes. But 200 and | ||||
# non-200 wire protocol responses both require draining. Since | ||||
# we have a capped reader in place for all situations where we | ||||
# drain, it is safe to read from that stream. We'll either do | ||||
# a drain or no-op if we're already at EOF. | ||||
drain = True | ||||
if close: | ||||
self.headers['Connection'] = 'Close' | ||||
if drain: | ||||
assert isinstance(self._req.bodyfh, util.cappedreader) | ||||
while True: | ||||
chunk = self._req.bodyfh.read(32768) | ||||
if not chunk: | ||||
break | ||||
Gregory Szorc
|
r36892 | write = self._startresponse(pycompat.sysstr(self.status), | ||
self.headers.items()) | ||||
Gregory Szorc
|
r36877 | if self._bodybytes: | ||
yield self._bodybytes | ||||
elif self._bodygen: | ||||
for chunk in self._bodygen: | ||||
yield chunk | ||||
Gregory Szorc
|
r36892 | elif self._bodywillwrite: | ||
self._bodywritefn = write | ||||
Gregory Szorc
|
r36877 | else: | ||
error.ProgrammingError('do not know how to send body') | ||||
Gregory Szorc
|
r36892 | def getbodyfile(self): | ||
"""Obtain a file object like object representing the response body. | ||||
For this to work, you must call ``setbodywillwrite()`` and then | ||||
``sendresponse()`` first. ``sendresponse()`` is a generator and the | ||||
function won't run to completion unless the generator is advanced. The | ||||
generator yields not items. The easiest way to consume it is with | ||||
``list(res.sendresponse())``, which should resolve to an empty list - | ||||
``[]``. | ||||
""" | ||||
if not self._bodywillwrite: | ||||
raise error.ProgrammingError('must call setbodywillwrite() first') | ||||
if not self._started: | ||||
raise error.ProgrammingError('must call sendresponse() first; did ' | ||||
'you remember to consume it since it ' | ||||
'is a generator?') | ||||
assert self._bodywritefn | ||||
return offsettrackingwriter(self._bodywritefn) | ||||
Dirkjan Ochtman
|
r5566 | class wsgirequest(object): | ||
Gregory Szorc
|
r26132 | """Higher-level API for a WSGI request. | ||
WSGI applications are invoked with 2 arguments. They are used to | ||||
instantiate instances of this class, which provides higher-level APIs | ||||
for obtaining request parameters, writing HTTP output, etc. | ||||
""" | ||||
Gregory Szorc
|
r36916 | def __init__(self, wsgienv, start_response, altbaseurl=None): | ||
Augie Fackler
|
r34513 | version = wsgienv[r'wsgi.version'] | ||
Thomas Arendsen Hein
|
r3673 | if (version < (1, 0)) or (version >= (2, 0)): | ||
Thomas Arendsen Hein
|
r4633 | raise RuntimeError("Unknown and unsupported WSGI version %d.%d" | ||
Eric Hopper
|
r2506 | % version) | ||
Gregory Szorc
|
r36873 | |||
inp = wsgienv[r'wsgi.input'] | ||||
Gregory Szorc
|
r36870 | |||
if r'HTTP_CONTENT_LENGTH' in wsgienv: | ||||
Gregory Szorc
|
r36873 | inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH'])) | ||
Gregory Szorc
|
r36870 | elif r'CONTENT_LENGTH' in wsgienv: | ||
Gregory Szorc
|
r36873 | inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH'])) | ||
Gregory Szorc
|
r36870 | |||
Augie Fackler
|
r34513 | self.err = wsgienv[r'wsgi.errors'] | ||
self.threaded = wsgienv[r'wsgi.multithread'] | ||||
self.multiprocess = wsgienv[r'wsgi.multiprocess'] | ||||
self.run_once = wsgienv[r'wsgi.run_once'] | ||||
Eric Hopper
|
r2506 | self.env = wsgienv | ||
Gregory Szorc
|
r36916 | self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl) | ||
Gregory Szorc
|
r36877 | self.res = wsgiresponse(self.req, start_response) | ||
Dirkjan Ochtman
|
r5888 | self._start_response = start_response | ||
Dirkjan Ochtman
|
r5993 | self.server_write = None | ||
Eric Hopper
|
r2506 | self.headers = [] | ||
Mads Kiilerich
|
r18352 | def respond(self, status, type, filename=None, body=None): | ||
Augie Fackler
|
r34515 | if not isinstance(type, str): | ||
type = pycompat.sysstr(type) | ||||
Dirkjan Ochtman
|
r5888 | if self._start_response is not None: | ||
Augie Fackler
|
r34723 | self.headers.append((r'Content-Type', type)) | ||
Mads Kiilerich
|
r18348 | if filename: | ||
r26846 | filename = (filename.rpartition('/')[-1] | |||
Mads Kiilerich
|
r18348 | .replace('\\', '\\\\').replace('"', '\\"')) | ||
self.headers.append(('Content-Disposition', | ||||
'inline; filename="%s"' % filename)) | ||||
Mads Kiilerich
|
r18352 | if body is not None: | ||
Augie Fackler
|
r34723 | self.headers.append((r'Content-Length', str(len(body)))) | ||
Dirkjan Ochtman
|
r5888 | |||
Dirkjan Ochtman
|
r5926 | for k, v in self.headers: | ||
if not isinstance(v, str): | ||||
Mads Kiilerich
|
r18348 | raise TypeError('header value must be string: %r' % (v,)) | ||
Dirkjan Ochtman
|
r5926 | |||
Dirkjan Ochtman
|
r5888 | if isinstance(status, ErrorResponse): | ||
Mads Kiilerich
|
r18348 | self.headers.extend(status.headers) | ||
Augie Fackler
|
r36269 | status = statusmessage(status.code, pycompat.bytestr(status)) | ||
Dirkjan Ochtman
|
r5993 | elif status == 200: | ||
status = '200 Script output follows' | ||||
Dirkjan Ochtman
|
r5888 | elif isinstance(status, int): | ||
status = statusmessage(status) | ||||
Gregory Szorc
|
r36871 | # Various HTTP clients (notably httplib) won't read the HTTP | ||
# response until the HTTP request has been sent in full. If servers | ||||
# (us) send a response before the HTTP request has been fully sent, | ||||
# the connection may deadlock because neither end is reading. | ||||
# | ||||
# We work around this by "draining" the request data before | ||||
# sending any response in some conditions. | ||||
drain = False | ||||
close = False | ||||
# If the client sent Expect: 100-continue, we assume it is smart | ||||
# enough to deal with the server sending a response before reading | ||||
# the request. (httplib doesn't do this.) | ||||
if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue': | ||||
pass | ||||
# Only tend to request methods that have bodies. Strictly speaking, | ||||
# we should sniff for a body. But this is fine for our existing | ||||
# WSGI applications. | ||||
elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'): | ||||
pass | ||||
else: | ||||
# If we don't know how much data to read, there's no guarantee | ||||
# that we can drain the request responsibly. The WSGI | ||||
# specification only says that servers *should* ensure the | ||||
# input stream doesn't overrun the actual request. So there's | ||||
# no guarantee that reading until EOF won't corrupt the stream | ||||
# state. | ||||
Gregory Szorc
|
r36873 | if not isinstance(self.req.bodyfh, util.cappedreader): | ||
Gregory Szorc
|
r36871 | close = True | ||
else: | ||||
# We /could/ only drain certain HTTP response codes. But 200 | ||||
# and non-200 wire protocol responses both require draining. | ||||
# Since we have a capped reader in place for all situations | ||||
# where we drain, it is safe to read from that stream. We'll | ||||
# either do a drain or no-op if we're already at EOF. | ||||
drain = True | ||||
if close: | ||||
self.headers.append((r'Connection', r'Close')) | ||||
if drain: | ||||
Gregory Szorc
|
r36873 | assert isinstance(self.req.bodyfh, util.cappedreader) | ||
Gregory Szorc
|
r36871 | while True: | ||
Gregory Szorc
|
r36873 | chunk = self.req.bodyfh.read(32768) | ||
Gregory Szorc
|
r36871 | if not chunk: | ||
break | ||||
Augie Fackler
|
r36272 | self.server_write = self._start_response( | ||
pycompat.sysstr(status), self.headers) | ||||
Dirkjan Ochtman
|
r5888 | self._start_response = None | ||
self.headers = [] | ||||
Mads Kiilerich
|
r18352 | if body is not None: | ||
self.write(body) | ||||
self.server_write = None | ||||
Dirkjan Ochtman
|
r5888 | |||
Dirkjan Ochtman
|
r5993 | def write(self, thing): | ||
Mads Kiilerich
|
r18351 | if thing: | ||
try: | ||||
self.server_write(thing) | ||||
Gregory Szorc
|
r25660 | except socket.error as inst: | ||
Mads Kiilerich
|
r18351 | if inst[0] != errno.ECONNRESET: | ||
raise | ||||
Eric Hopper
|
r2355 | |||
Alexis S. L. Carvalho
|
r4246 | def flush(self): | ||
return None | ||||
Dirkjan Ochtman
|
r5566 | def wsgiapplication(app_maker): | ||
Dirkjan Ochtman
|
r5887 | '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() | ||
can and should now be used as a WSGI application.''' | ||||
Thomas Arendsen Hein
|
r5760 | application = app_maker() | ||
def run_wsgi(env, respond): | ||||
Dirkjan Ochtman
|
r5887 | return application(env, respond) | ||
Thomas Arendsen Hein
|
r5760 | return run_wsgi | ||