request.py
312 lines
| 11.7 KiB
| text/x-python
|
PythonLexer
Eric Hopper
|
r2391 | # hgweb/request.py - An http request from either CGI or the standalone server. | ||
Eric Hopper
|
r2355 | # | ||
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net> | ||||
Vadim Gelfer
|
r2859 | # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
Eric Hopper
|
r2355 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Eric Hopper
|
r2355 | |||
Yuya Nishihara
|
r27046 | from __future__ import absolute_import | ||
import cgi | ||||
import errno | ||||
import socket | ||||
Gregory Szorc
|
r36832 | import wsgiref.headers as wsgiheaders | ||
Gregory Szorc
|
r36824 | #import wsgiref.validate | ||
Yuya Nishihara
|
r27046 | |||
from .common import ( | ||||
ErrorResponse, | ||||
HTTP_NOT_MODIFIED, | ||||
statusmessage, | ||||
) | ||||
Gregory Szorc
|
r36824 | from ..thirdparty import ( | ||
attr, | ||||
) | ||||
Yuya Nishihara
|
r27046 | from .. import ( | ||
Augie Fackler
|
r34515 | pycompat, | ||
Yuya Nishihara
|
r27046 | util, | ||
) | ||||
Eric Hopper
|
r2355 | |||
Dirkjan Ochtman
|
r6774 | shortcuts = { | ||
'cl': [('cmd', ['changelog']), ('rev', None)], | ||||
'sl': [('cmd', ['shortlog']), ('rev', None)], | ||||
'cs': [('cmd', ['changeset']), ('node', None)], | ||||
'f': [('cmd', ['file']), ('filenode', None)], | ||||
'fl': [('cmd', ['filelog']), ('filenode', None)], | ||||
'fd': [('cmd', ['filediff']), ('node', None)], | ||||
'fa': [('cmd', ['annotate']), ('filenode', None)], | ||||
'mf': [('cmd', ['manifest']), ('manifest', None)], | ||||
'ca': [('cmd', ['archive']), ('node', None)], | ||||
'tags': [('cmd', ['tags'])], | ||||
'tip': [('cmd', ['changeset']), ('node', ['tip'])], | ||||
'static': [('cmd', ['static']), ('file', None)] | ||||
} | ||||
Nicolas Dumazet
|
r10261 | def normalize(form): | ||
# first expand the shortcuts | ||||
Augie Fackler
|
r34514 | for k in shortcuts: | ||
Dirkjan Ochtman
|
r6774 | if k in form: | ||
for name, value in shortcuts[k]: | ||||
if value is None: | ||||
value = form[k] | ||||
form[name] = value | ||||
del form[k] | ||||
Nicolas Dumazet
|
r10261 | # And strip the values | ||
Augie Fackler
|
r36730 | bytesform = {} | ||
Nicolas Dumazet
|
r10261 | for k, v in form.iteritems(): | ||
Augie Fackler
|
r36730 | bytesform[pycompat.bytesurl(k)] = [ | ||
pycompat.bytesurl(i.strip()) for i in v] | ||||
return bytesform | ||||
Dirkjan Ochtman
|
r6774 | |||
Gregory Szorc
|
r36824 | @attr.s(frozen=True) | ||
class parsedrequest(object): | ||||
"""Represents a parsed WSGI request / static HTTP request parameters.""" | ||||
Gregory Szorc
|
r36864 | # Request method. | ||
method = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Full URL for this request. | ||
url = attr.ib() | ||||
# URL without any path components. Just <proto>://<host><port>. | ||||
baseurl = attr.ib() | ||||
# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead | ||||
# of HTTP: Host header for hostname. This is likely what clients used. | ||||
advertisedurl = attr.ib() | ||||
advertisedbaseurl = attr.ib() | ||||
# WSGI application path. | ||||
apppath = attr.ib() | ||||
# List of path parts to be used for dispatch. | ||||
dispatchparts = attr.ib() | ||||
# URL path component (no query string) used for dispatch. | ||||
dispatchpath = attr.ib() | ||||
Gregory Szorc
|
r36829 | # Whether there is a path component to this request. This can be true | ||
# when ``dispatchpath`` is empty due to REPO_NAME muckery. | ||||
havepathinfo = attr.ib() | ||||
Gregory Szorc
|
r36824 | # Raw query string (part after "?" in URL). | ||
querystring = attr.ib() | ||||
Gregory Szorc
|
r36827 | # List of 2-tuples of query string arguments. | ||
querystringlist = attr.ib() | ||||
# Dict of query string arguments. Values are lists with at least 1 item. | ||||
querystringdict = attr.ib() | ||||
Gregory Szorc
|
r36832 | # wsgiref.headers.Headers instance. Operates like a dict with case | ||
# insensitive keys. | ||||
headers = attr.ib() | ||||
Gregory Szorc
|
r36824 | |||
def parserequestfromenv(env): | ||||
"""Parse URL components from environment variables. | ||||
WSGI defines request attributes via environment variables. This function | ||||
parses the environment variables into a data structure. | ||||
""" | ||||
# PEP-0333 defines the WSGI spec and is a useful reference for this code. | ||||
# We first validate that the incoming object conforms with the WSGI spec. | ||||
# We only want to be dealing with spec-conforming WSGI implementations. | ||||
# TODO enable this once we fix internal violations. | ||||
#wsgiref.validate.check_environ(env) | ||||
# PEP-0333 states that environment keys and values are native strings | ||||
# (bytes on Python 2 and str on Python 3). The code points for the Unicode | ||||
# strings on Python 3 must be between \00000-\000FF. We deal with bytes | ||||
# in Mercurial, so mass convert string keys and values to bytes. | ||||
if pycompat.ispy3: | ||||
env = {k.encode('latin-1'): v for k, v in env.iteritems()} | ||||
env = {k: v.encode('latin-1') if isinstance(v, str) else v | ||||
for k, v in env.iteritems()} | ||||
# https://www.python.org/dev/peps/pep-0333/#environ-variables defines | ||||
# the environment variables. | ||||
# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines | ||||
# how URLs are reconstructed. | ||||
fullurl = env['wsgi.url_scheme'] + '://' | ||||
advertisedfullurl = fullurl | ||||
def addport(s): | ||||
if env['wsgi.url_scheme'] == 'https': | ||||
if env['SERVER_PORT'] != '443': | ||||
s += ':' + env['SERVER_PORT'] | ||||
else: | ||||
if env['SERVER_PORT'] != '80': | ||||
s += ':' + env['SERVER_PORT'] | ||||
return s | ||||
if env.get('HTTP_HOST'): | ||||
fullurl += env['HTTP_HOST'] | ||||
else: | ||||
fullurl += env['SERVER_NAME'] | ||||
fullurl = addport(fullurl) | ||||
advertisedfullurl += env['SERVER_NAME'] | ||||
advertisedfullurl = addport(advertisedfullurl) | ||||
baseurl = fullurl | ||||
advertisedbaseurl = advertisedfullurl | ||||
fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | ||||
advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | ||||
fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | ||||
advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | ||||
if env.get('QUERY_STRING'): | ||||
fullurl += '?' + env['QUERY_STRING'] | ||||
advertisedfullurl += '?' + env['QUERY_STRING'] | ||||
# When dispatching requests, we look at the URL components (PATH_INFO | ||||
# and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir | ||||
# has the concept of "virtual" repositories. This is defined via REPO_NAME. | ||||
# If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app | ||||
# root. We also exclude its path components from PATH_INFO when resolving | ||||
# the dispatch path. | ||||
apppath = env['SCRIPT_NAME'] | ||||
if env.get('REPO_NAME'): | ||||
Gregory Szorc
|
r36826 | if not apppath.endswith('/'): | ||
apppath += '/' | ||||
apppath += env.get('REPO_NAME') | ||||
Gregory Szorc
|
r36824 | |||
if 'PATH_INFO' in env: | ||||
dispatchparts = env['PATH_INFO'].strip('/').split('/') | ||||
# Strip out repo parts. | ||||
repoparts = env.get('REPO_NAME', '').split('/') | ||||
if dispatchparts[:len(repoparts)] == repoparts: | ||||
dispatchparts = dispatchparts[len(repoparts):] | ||||
else: | ||||
dispatchparts = [] | ||||
dispatchpath = '/'.join(dispatchparts) | ||||
querystring = env.get('QUERY_STRING', '') | ||||
Gregory Szorc
|
r36827 | # We store as a list so we have ordering information. We also store as | ||
# a dict to facilitate fast lookup. | ||||
querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True) | ||||
querystringdict = {} | ||||
for k, v in querystringlist: | ||||
if k in querystringdict: | ||||
querystringdict[k].append(v) | ||||
else: | ||||
querystringdict[k] = [v] | ||||
Gregory Szorc
|
r36832 | # HTTP_* keys contain HTTP request headers. The Headers structure should | ||
# perform case normalization for us. We just rewrite underscore to dash | ||||
# so keys match what likely went over the wire. | ||||
headers = [] | ||||
for k, v in env.iteritems(): | ||||
if k.startswith('HTTP_'): | ||||
headers.append((k[len('HTTP_'):].replace('_', '-'), v)) | ||||
headers = wsgiheaders.Headers(headers) | ||||
Gregory Szorc
|
r36863 | # This is kind of a lie because the HTTP header wasn't explicitly | ||
# sent. But for all intents and purposes it should be OK to lie about | ||||
# this, since a consumer will either either value to determine how many | ||||
# bytes are available to read. | ||||
if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: | ||||
headers['Content-Length'] = env['CONTENT_LENGTH'] | ||||
Gregory Szorc
|
r36864 | return parsedrequest(method=env['REQUEST_METHOD'], | ||
url=fullurl, baseurl=baseurl, | ||||
Gregory Szorc
|
r36824 | advertisedurl=advertisedfullurl, | ||
advertisedbaseurl=advertisedbaseurl, | ||||
apppath=apppath, | ||||
dispatchparts=dispatchparts, dispatchpath=dispatchpath, | ||||
Gregory Szorc
|
r36829 | havepathinfo='PATH_INFO' in env, | ||
Gregory Szorc
|
r36827 | querystring=querystring, | ||
querystringlist=querystringlist, | ||||
Gregory Szorc
|
r36832 | querystringdict=querystringdict, | ||
headers=headers) | ||||
Gregory Szorc
|
r36824 | |||
Dirkjan Ochtman
|
r5566 | class wsgirequest(object): | ||
Gregory Szorc
|
r26132 | """Higher-level API for a WSGI request. | ||
WSGI applications are invoked with 2 arguments. They are used to | ||||
instantiate instances of this class, which provides higher-level APIs | ||||
for obtaining request parameters, writing HTTP output, etc. | ||||
""" | ||||
Dirkjan Ochtman
|
r5566 | def __init__(self, wsgienv, start_response): | ||
Augie Fackler
|
r34513 | version = wsgienv[r'wsgi.version'] | ||
Thomas Arendsen Hein
|
r3673 | if (version < (1, 0)) or (version >= (2, 0)): | ||
Thomas Arendsen Hein
|
r4633 | raise RuntimeError("Unknown and unsupported WSGI version %d.%d" | ||
Eric Hopper
|
r2506 | % version) | ||
Augie Fackler
|
r34513 | self.inp = wsgienv[r'wsgi.input'] | ||
self.err = wsgienv[r'wsgi.errors'] | ||||
self.threaded = wsgienv[r'wsgi.multithread'] | ||||
self.multiprocess = wsgienv[r'wsgi.multiprocess'] | ||||
self.run_once = wsgienv[r'wsgi.run_once'] | ||||
Eric Hopper
|
r2506 | self.env = wsgienv | ||
Nicolas Dumazet
|
r10261 | self.form = normalize(cgi.parse(self.inp, | ||
self.env, | ||||
keep_blank_values=1)) | ||||
Dirkjan Ochtman
|
r5888 | self._start_response = start_response | ||
Dirkjan Ochtman
|
r5993 | self.server_write = None | ||
Eric Hopper
|
r2506 | self.headers = [] | ||
Dirkjan Ochtman
|
r7180 | def drain(self): | ||
'''need to read all data from request, httplib is half-duplex''' | ||||
Dirkjan Ochtman
|
r13600 | length = int(self.env.get('CONTENT_LENGTH') or 0) | ||
Dirkjan Ochtman
|
r7180 | for s in util.filechunkiter(self.inp, limit=length): | ||
pass | ||||
Mads Kiilerich
|
r18352 | def respond(self, status, type, filename=None, body=None): | ||
Augie Fackler
|
r34515 | if not isinstance(type, str): | ||
type = pycompat.sysstr(type) | ||||
Dirkjan Ochtman
|
r5888 | if self._start_response is not None: | ||
Augie Fackler
|
r34723 | self.headers.append((r'Content-Type', type)) | ||
Mads Kiilerich
|
r18348 | if filename: | ||
r26846 | filename = (filename.rpartition('/')[-1] | |||
Mads Kiilerich
|
r18348 | .replace('\\', '\\\\').replace('"', '\\"')) | ||
self.headers.append(('Content-Disposition', | ||||
'inline; filename="%s"' % filename)) | ||||
Mads Kiilerich
|
r18352 | if body is not None: | ||
Augie Fackler
|
r34723 | self.headers.append((r'Content-Length', str(len(body)))) | ||
Dirkjan Ochtman
|
r5888 | |||
Dirkjan Ochtman
|
r5926 | for k, v in self.headers: | ||
if not isinstance(v, str): | ||||
Mads Kiilerich
|
r18348 | raise TypeError('header value must be string: %r' % (v,)) | ||
Dirkjan Ochtman
|
r5926 | |||
Dirkjan Ochtman
|
r5888 | if isinstance(status, ErrorResponse): | ||
Mads Kiilerich
|
r18348 | self.headers.extend(status.headers) | ||
Augie Fackler
|
r12739 | if status.code == HTTP_NOT_MODIFIED: | ||
# RFC 2616 Section 10.3.5: 304 Not Modified has cases where | ||||
# it MUST NOT include any headers other than these and no | ||||
# body | ||||
self.headers = [(k, v) for (k, v) in self.headers if | ||||
k in ('Date', 'ETag', 'Expires', | ||||
'Cache-Control', 'Vary')] | ||||
Augie Fackler
|
r36269 | status = statusmessage(status.code, pycompat.bytestr(status)) | ||
Dirkjan Ochtman
|
r5993 | elif status == 200: | ||
status = '200 Script output follows' | ||||
Dirkjan Ochtman
|
r5888 | elif isinstance(status, int): | ||
status = statusmessage(status) | ||||
Augie Fackler
|
r36272 | self.server_write = self._start_response( | ||
pycompat.sysstr(status), self.headers) | ||||
Dirkjan Ochtman
|
r5888 | self._start_response = None | ||
self.headers = [] | ||||
Mads Kiilerich
|
r18352 | if body is not None: | ||
self.write(body) | ||||
self.server_write = None | ||||
Dirkjan Ochtman
|
r5888 | |||
Dirkjan Ochtman
|
r5993 | def write(self, thing): | ||
Mads Kiilerich
|
r18351 | if thing: | ||
try: | ||||
self.server_write(thing) | ||||
Gregory Szorc
|
r25660 | except socket.error as inst: | ||
Mads Kiilerich
|
r18351 | if inst[0] != errno.ECONNRESET: | ||
raise | ||||
Eric Hopper
|
r2355 | |||
Alexis S. L. Carvalho
|
r4246 | def flush(self): | ||
return None | ||||
Dirkjan Ochtman
|
r5566 | def wsgiapplication(app_maker): | ||
Dirkjan Ochtman
|
r5887 | '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() | ||
can and should now be used as a WSGI application.''' | ||||
Thomas Arendsen Hein
|
r5760 | application = app_maker() | ||
def run_wsgi(env, respond): | ||||
Dirkjan Ochtman
|
r5887 | return application(env, respond) | ||
Thomas Arendsen Hein
|
r5760 | return run_wsgi | ||