##// END OF EJS Templates
hgweb: document continuereader...
hgweb: document continuereader Differential Revision: https://phab.mercurial-scm.org/D2767

File last commit:

r36868:e3f809e0 default
r36869:70666171 default
Show More
request.py
312 lines | 11.7 KiB | text/x-python | PythonLexer
Eric Hopper
Fixing up comment headers for split up code.
r2391 # hgweb/request.py - An http request from either CGI or the standalone server.
Eric Hopper
Splitting up hgweb so it's easier to change.
r2355 #
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
Vadim Gelfer
update copyrights.
r2859 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
Eric Hopper
Splitting up hgweb so it's easier to change.
r2355 #
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Eric Hopper
Splitting up hgweb so it's easier to change.
r2355
Yuya Nishihara
hgweb: use absolute_import
r27046 from __future__ import absolute_import
import cgi
import errno
import socket
Gregory Szorc
hgweb: parse and store HTTP request headers...
r36832 import wsgiref.headers as wsgiheaders
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 #import wsgiref.validate
Yuya Nishihara
hgweb: use absolute_import
r27046
from .common import (
ErrorResponse,
HTTP_NOT_MODIFIED,
statusmessage,
)
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 from ..thirdparty import (
attr,
)
Yuya Nishihara
hgweb: use absolute_import
r27046 from .. import (
Augie Fackler
request: coerce content-type to native str...
r34515 pycompat,
Yuya Nishihara
hgweb: use absolute_import
r27046 util,
)
Eric Hopper
Splitting up hgweb so it's easier to change.
r2355
Dirkjan Ochtman
hgweb: move shortcut expansion to request instantiation
r6774 shortcuts = {
'cl': [('cmd', ['changelog']), ('rev', None)],
'sl': [('cmd', ['shortlog']), ('rev', None)],
'cs': [('cmd', ['changeset']), ('node', None)],
'f': [('cmd', ['file']), ('filenode', None)],
'fl': [('cmd', ['filelog']), ('filenode', None)],
'fd': [('cmd', ['filediff']), ('node', None)],
'fa': [('cmd', ['annotate']), ('filenode', None)],
'mf': [('cmd', ['manifest']), ('manifest', None)],
'ca': [('cmd', ['archive']), ('node', None)],
'tags': [('cmd', ['tags'])],
'tip': [('cmd', ['changeset']), ('node', ['tip'])],
'static': [('cmd', ['static']), ('file', None)]
}
Nicolas Dumazet
hgweb: request: strip() form values...
r10261 def normalize(form):
# first expand the shortcuts
Augie Fackler
request: use trivial iterator over dictionary keys...
r34514 for k in shortcuts:
Dirkjan Ochtman
hgweb: move shortcut expansion to request instantiation
r6774 if k in form:
for name, value in shortcuts[k]:
if value is None:
value = form[k]
form[name] = value
del form[k]
Nicolas Dumazet
hgweb: request: strip() form values...
r10261 # And strip the values
Augie Fackler
hgweb: convert req.form to bytes for all keys and values...
r36730 bytesform = {}
Nicolas Dumazet
hgweb: request: strip() form values...
r10261 for k, v in form.iteritems():
Augie Fackler
hgweb: convert req.form to bytes for all keys and values...
r36730 bytesform[pycompat.bytesurl(k)] = [
pycompat.bytesurl(i.strip()) for i in v]
return bytesform
Dirkjan Ochtman
hgweb: move shortcut expansion to request instantiation
r6774
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 @attr.s(frozen=True)
class parsedrequest(object):
"""Represents a parsed WSGI request / static HTTP request parameters."""
Gregory Szorc
hgweb: store and use request method on parsed request...
r36864 # Request method.
method = attr.ib()
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 # Full URL for this request.
url = attr.ib()
# URL without any path components. Just <proto>://<host><port>.
baseurl = attr.ib()
# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
# of HTTP: Host header for hostname. This is likely what clients used.
advertisedurl = attr.ib()
advertisedbaseurl = attr.ib()
# WSGI application path.
apppath = attr.ib()
# List of path parts to be used for dispatch.
dispatchparts = attr.ib()
# URL path component (no query string) used for dispatch.
dispatchpath = attr.ib()
Gregory Szorc
hgweb: use parsed request to construct query parameters...
r36829 # Whether there is a path component to this request. This can be true
# when ``dispatchpath`` is empty due to REPO_NAME muckery.
havepathinfo = attr.ib()
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 # Raw query string (part after "?" in URL).
querystring = attr.ib()
Gregory Szorc
hgweb: teach WSGI parser about query strings...
r36827 # List of 2-tuples of query string arguments.
querystringlist = attr.ib()
# Dict of query string arguments. Values are lists with at least 1 item.
querystringdict = attr.ib()
Gregory Szorc
hgweb: parse and store HTTP request headers...
r36832 # wsgiref.headers.Headers instance. Operates like a dict with case
# insensitive keys.
headers = attr.ib()
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824
def parserequestfromenv(env):
"""Parse URL components from environment variables.
WSGI defines request attributes via environment variables. This function
parses the environment variables into a data structure.
"""
# PEP-0333 defines the WSGI spec and is a useful reference for this code.
# We first validate that the incoming object conforms with the WSGI spec.
# We only want to be dealing with spec-conforming WSGI implementations.
# TODO enable this once we fix internal violations.
#wsgiref.validate.check_environ(env)
# PEP-0333 states that environment keys and values are native strings
# (bytes on Python 2 and str on Python 3). The code points for the Unicode
# strings on Python 3 must be between \00000-\000FF. We deal with bytes
# in Mercurial, so mass convert string keys and values to bytes.
if pycompat.ispy3:
env = {k.encode('latin-1'): v for k, v in env.iteritems()}
env = {k: v.encode('latin-1') if isinstance(v, str) else v
for k, v in env.iteritems()}
# https://www.python.org/dev/peps/pep-0333/#environ-variables defines
# the environment variables.
# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
# how URLs are reconstructed.
fullurl = env['wsgi.url_scheme'] + '://'
advertisedfullurl = fullurl
def addport(s):
if env['wsgi.url_scheme'] == 'https':
if env['SERVER_PORT'] != '443':
s += ':' + env['SERVER_PORT']
else:
if env['SERVER_PORT'] != '80':
s += ':' + env['SERVER_PORT']
return s
if env.get('HTTP_HOST'):
fullurl += env['HTTP_HOST']
else:
fullurl += env['SERVER_NAME']
fullurl = addport(fullurl)
advertisedfullurl += env['SERVER_NAME']
advertisedfullurl = addport(advertisedfullurl)
baseurl = fullurl
advertisedbaseurl = advertisedfullurl
fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
if env.get('QUERY_STRING'):
fullurl += '?' + env['QUERY_STRING']
advertisedfullurl += '?' + env['QUERY_STRING']
# When dispatching requests, we look at the URL components (PATH_INFO
# and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
# has the concept of "virtual" repositories. This is defined via REPO_NAME.
# If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
# root. We also exclude its path components from PATH_INFO when resolving
# the dispatch path.
apppath = env['SCRIPT_NAME']
if env.get('REPO_NAME'):
Gregory Szorc
hgweb: use the parsed application path directly...
r36826 if not apppath.endswith('/'):
apppath += '/'
apppath += env.get('REPO_NAME')
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824
if 'PATH_INFO' in env:
dispatchparts = env['PATH_INFO'].strip('/').split('/')
# Strip out repo parts.
repoparts = env.get('REPO_NAME', '').split('/')
if dispatchparts[:len(repoparts)] == repoparts:
dispatchparts = dispatchparts[len(repoparts):]
else:
dispatchparts = []
dispatchpath = '/'.join(dispatchparts)
querystring = env.get('QUERY_STRING', '')
Gregory Szorc
hgweb: teach WSGI parser about query strings...
r36827 # We store as a list so we have ordering information. We also store as
# a dict to facilitate fast lookup.
querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
querystringdict = {}
for k, v in querystringlist:
if k in querystringdict:
querystringdict[k].append(v)
else:
querystringdict[k] = [v]
Gregory Szorc
hgweb: parse and store HTTP request headers...
r36832 # HTTP_* keys contain HTTP request headers. The Headers structure should
# perform case normalization for us. We just rewrite underscore to dash
# so keys match what likely went over the wire.
headers = []
for k, v in env.iteritems():
if k.startswith('HTTP_'):
headers.append((k[len('HTTP_'):].replace('_', '-'), v))
headers = wsgiheaders.Headers(headers)
Gregory Szorc
hgweb: handle CONTENT_LENGTH...
r36863 # This is kind of a lie because the HTTP header wasn't explicitly
# sent. But for all intents and purposes it should be OK to lie about
# this, since a consumer will either either value to determine how many
# bytes are available to read.
if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
headers['Content-Length'] = env['CONTENT_LENGTH']
Gregory Szorc
hgweb: store and use request method on parsed request...
r36864 return parsedrequest(method=env['REQUEST_METHOD'],
url=fullurl, baseurl=baseurl,
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824 advertisedurl=advertisedfullurl,
advertisedbaseurl=advertisedbaseurl,
apppath=apppath,
dispatchparts=dispatchparts, dispatchpath=dispatchpath,
Gregory Szorc
hgweb: use parsed request to construct query parameters...
r36829 havepathinfo='PATH_INFO' in env,
Gregory Szorc
hgweb: teach WSGI parser about query strings...
r36827 querystring=querystring,
querystringlist=querystringlist,
Gregory Szorc
hgweb: parse and store HTTP request headers...
r36832 querystringdict=querystringdict,
headers=headers)
Gregory Szorc
hgweb: parse WSGI request into a data structure...
r36824
Dirkjan Ochtman
Less indirection in the WSGI web interface. This simplifies some code, and makes it more compliant with WSGI.
r5566 class wsgirequest(object):
Gregory Szorc
hgweb: add some documentation...
r26132 """Higher-level API for a WSGI request.
WSGI applications are invoked with 2 arguments. They are used to
instantiate instances of this class, which provides higher-level APIs
for obtaining request parameters, writing HTTP output, etc.
"""
Dirkjan Ochtman
Less indirection in the WSGI web interface. This simplifies some code, and makes it more compliant with WSGI.
r5566 def __init__(self, wsgienv, start_response):
Augie Fackler
hgweb: when constructing or adding to a wsgi environ dict, use native strs...
r34513 version = wsgienv[r'wsgi.version']
Thomas Arendsen Hein
white space and line break cleanups
r3673 if (version < (1, 0)) or (version >= (2, 0)):
Thomas Arendsen Hein
Cleanup of whitespace, indentation and line continuation.
r4633 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
Eric Hopper
This patch make several WSGI related alterations....
r2506 % version)
Augie Fackler
hgweb: when constructing or adding to a wsgi environ dict, use native strs...
r34513 self.inp = wsgienv[r'wsgi.input']
self.err = wsgienv[r'wsgi.errors']
self.threaded = wsgienv[r'wsgi.multithread']
self.multiprocess = wsgienv[r'wsgi.multiprocess']
self.run_once = wsgienv[r'wsgi.run_once']
Eric Hopper
This patch make several WSGI related alterations....
r2506 self.env = wsgienv
Nicolas Dumazet
hgweb: request: strip() form values...
r10261 self.form = normalize(cgi.parse(self.inp,
self.env,
keep_blank_values=1))
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888 self._start_response = start_response
Dirkjan Ochtman
hgweb: explicit response status
r5993 self.server_write = None
Eric Hopper
This patch make several WSGI related alterations....
r2506 self.headers = []
Dirkjan Ochtman
hgweb: be sure to drain request data even in early error conditions...
r7180 def drain(self):
'''need to read all data from request, httplib is half-duplex'''
Dirkjan Ochtman
hgweb: pmezard thinks one default is enough
r13600 length = int(self.env.get('CONTENT_LENGTH') or 0)
Dirkjan Ochtman
hgweb: be sure to drain request data even in early error conditions...
r7180 for s in util.filechunkiter(self.inp, limit=length):
pass
Mads Kiilerich
hgweb: pass the actual response body to request.response, not just the length...
r18352 def respond(self, status, type, filename=None, body=None):
Augie Fackler
request: coerce content-type to native str...
r34515 if not isinstance(type, str):
type = pycompat.sysstr(type)
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888 if self._start_response is not None:
Augie Fackler
hgweb: fill in content-type and content-length as native strings...
r34723 self.headers.append((r'Content-Type', type))
Mads Kiilerich
hgweb: simplify wsgirequest header handling...
r18348 if filename:
av6
hgweb: replace some str.split() calls by str.partition() or str.rpartition()...
r26846 filename = (filename.rpartition('/')[-1]
Mads Kiilerich
hgweb: simplify wsgirequest header handling...
r18348 .replace('\\', '\\\\').replace('"', '\\"'))
self.headers.append(('Content-Disposition',
'inline; filename="%s"' % filename))
Mads Kiilerich
hgweb: pass the actual response body to request.response, not just the length...
r18352 if body is not None:
Augie Fackler
hgweb: fill in content-type and content-length as native strings...
r34723 self.headers.append((r'Content-Length', str(len(body))))
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888
Dirkjan Ochtman
hgweb: be sure to send a valid content-type for raw files
r5926 for k, v in self.headers:
if not isinstance(v, str):
Mads Kiilerich
hgweb: simplify wsgirequest header handling...
r18348 raise TypeError('header value must be string: %r' % (v,))
Dirkjan Ochtman
hgweb: be sure to send a valid content-type for raw files
r5926
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888 if isinstance(status, ErrorResponse):
Mads Kiilerich
hgweb: simplify wsgirequest header handling...
r18348 self.headers.extend(status.headers)
Augie Fackler
hgweb: don't send a body or illegal headers during 304 response...
r12739 if status.code == HTTP_NOT_MODIFIED:
# RFC 2616 Section 10.3.5: 304 Not Modified has cases where
# it MUST NOT include any headers other than these and no
# body
self.headers = [(k, v) for (k, v) in self.headers if
k in ('Date', 'ETag', 'Expires',
'Cache-Control', 'Vary')]
Augie Fackler
hgweb: correctly bytes-ify status, not string-ify...
r36269 status = statusmessage(status.code, pycompat.bytestr(status))
Dirkjan Ochtman
hgweb: explicit response status
r5993 elif status == 200:
status = '200 Script output follows'
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888 elif isinstance(status, int):
status = statusmessage(status)
Augie Fackler
py3: get bytes-repr of network errors portably...
r36272 self.server_write = self._start_response(
pycompat.sysstr(status), self.headers)
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888 self._start_response = None
self.headers = []
Mads Kiilerich
hgweb: pass the actual response body to request.response, not just the length...
r18352 if body is not None:
self.write(body)
self.server_write = None
Dirkjan Ochtman
hgweb: separate out start_response() calling
r5888
Dirkjan Ochtman
hgweb: explicit response status
r5993 def write(self, thing):
Mads Kiilerich
hgweb: don't pass empty response chunks on...
r18351 if thing:
try:
self.server_write(thing)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except socket.error as inst:
Mads Kiilerich
hgweb: don't pass empty response chunks on...
r18351 if inst[0] != errno.ECONNRESET:
raise
Eric Hopper
Splitting up hgweb so it's easier to change.
r2355
Alexis S. L. Carvalho
avoid _wsgioutputfile <-> _wsgirequest circular reference...
r4246 def flush(self):
return None
Dirkjan Ochtman
Less indirection in the WSGI web interface. This simplifies some code, and makes it more compliant with WSGI.
r5566 def wsgiapplication(app_maker):
Dirkjan Ochtman
hgweb: return iterable, add deprecation note
r5887 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
can and should now be used as a WSGI application.'''
Thomas Arendsen Hein
Removed tabs and trailing whitespace in python files
r5760 application = app_maker()
def run_wsgi(env, respond):
Dirkjan Ochtman
hgweb: return iterable, add deprecation note
r5887 return application(env, respond)
Thomas Arendsen Hein
Removed tabs and trailing whitespace in python files
r5760 return run_wsgi