##// END OF EJS Templates
hgweb: cast bytearray to bytes...
Gregory Szorc -
r40612:6107d454 stable
parent child Browse files
Show More
@@ -1,576 +1,582 b''
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 #import wsgiref.validate
11 #import wsgiref.validate
12
12
13 from ..thirdparty import (
13 from ..thirdparty import (
14 attr,
14 attr,
15 )
15 )
16 from .. import (
16 from .. import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 class multidict(object):
22 class multidict(object):
23 """A dict like object that can store multiple values for a key.
23 """A dict like object that can store multiple values for a key.
24
24
25 Used to store parsed request parameters.
25 Used to store parsed request parameters.
26
26
27 This is inspired by WebOb's class of the same name.
27 This is inspired by WebOb's class of the same name.
28 """
28 """
29 def __init__(self):
29 def __init__(self):
30 self._items = {}
30 self._items = {}
31
31
32 def __getitem__(self, key):
32 def __getitem__(self, key):
33 """Returns the last set value for a key."""
33 """Returns the last set value for a key."""
34 return self._items[key][-1]
34 return self._items[key][-1]
35
35
36 def __setitem__(self, key, value):
36 def __setitem__(self, key, value):
37 """Replace a values for a key with a new value."""
37 """Replace a values for a key with a new value."""
38 self._items[key] = [value]
38 self._items[key] = [value]
39
39
40 def __delitem__(self, key):
40 def __delitem__(self, key):
41 """Delete all values for a key."""
41 """Delete all values for a key."""
42 del self._items[key]
42 del self._items[key]
43
43
44 def __contains__(self, key):
44 def __contains__(self, key):
45 return key in self._items
45 return key in self._items
46
46
47 def __len__(self):
47 def __len__(self):
48 return len(self._items)
48 return len(self._items)
49
49
50 def get(self, key, default=None):
50 def get(self, key, default=None):
51 try:
51 try:
52 return self.__getitem__(key)
52 return self.__getitem__(key)
53 except KeyError:
53 except KeyError:
54 return default
54 return default
55
55
56 def add(self, key, value):
56 def add(self, key, value):
57 """Add a new value for a key. Does not replace existing values."""
57 """Add a new value for a key. Does not replace existing values."""
58 self._items.setdefault(key, []).append(value)
58 self._items.setdefault(key, []).append(value)
59
59
60 def getall(self, key):
60 def getall(self, key):
61 """Obtains all values for a key."""
61 """Obtains all values for a key."""
62 return self._items.get(key, [])
62 return self._items.get(key, [])
63
63
64 def getone(self, key):
64 def getone(self, key):
65 """Obtain a single value for a key.
65 """Obtain a single value for a key.
66
66
67 Raises KeyError if key not defined or it has multiple values set.
67 Raises KeyError if key not defined or it has multiple values set.
68 """
68 """
69 vals = self._items[key]
69 vals = self._items[key]
70
70
71 if len(vals) > 1:
71 if len(vals) > 1:
72 raise KeyError('multiple values for %r' % key)
72 raise KeyError('multiple values for %r' % key)
73
73
74 return vals[0]
74 return vals[0]
75
75
76 def asdictoflists(self):
76 def asdictoflists(self):
77 return {k: list(v) for k, v in self._items.iteritems()}
77 return {k: list(v) for k, v in self._items.iteritems()}
78
78
79 @attr.s(frozen=True)
79 @attr.s(frozen=True)
80 class parsedrequest(object):
80 class parsedrequest(object):
81 """Represents a parsed WSGI request.
81 """Represents a parsed WSGI request.
82
82
83 Contains both parsed parameters as well as a handle on the input stream.
83 Contains both parsed parameters as well as a handle on the input stream.
84 """
84 """
85
85
86 # Request method.
86 # Request method.
87 method = attr.ib()
87 method = attr.ib()
88 # Full URL for this request.
88 # Full URL for this request.
89 url = attr.ib()
89 url = attr.ib()
90 # URL without any path components. Just <proto>://<host><port>.
90 # URL without any path components. Just <proto>://<host><port>.
91 baseurl = attr.ib()
91 baseurl = attr.ib()
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # of HTTP: Host header for hostname. This is likely what clients used.
93 # of HTTP: Host header for hostname. This is likely what clients used.
94 advertisedurl = attr.ib()
94 advertisedurl = attr.ib()
95 advertisedbaseurl = attr.ib()
95 advertisedbaseurl = attr.ib()
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 urlscheme = attr.ib()
97 urlscheme = attr.ib()
98 # Value of REMOTE_USER, if set, or None.
98 # Value of REMOTE_USER, if set, or None.
99 remoteuser = attr.ib()
99 remoteuser = attr.ib()
100 # Value of REMOTE_HOST, if set, or None.
100 # Value of REMOTE_HOST, if set, or None.
101 remotehost = attr.ib()
101 remotehost = attr.ib()
102 # Relative WSGI application path. If defined, will begin with a
102 # Relative WSGI application path. If defined, will begin with a
103 # ``/``.
103 # ``/``.
104 apppath = attr.ib()
104 apppath = attr.ib()
105 # List of path parts to be used for dispatch.
105 # List of path parts to be used for dispatch.
106 dispatchparts = attr.ib()
106 dispatchparts = attr.ib()
107 # URL path component (no query string) used for dispatch. Can be
107 # URL path component (no query string) used for dispatch. Can be
108 # ``None`` to signal no path component given to the request, an
108 # ``None`` to signal no path component given to the request, an
109 # empty string to signal a request to the application's root URL,
109 # empty string to signal a request to the application's root URL,
110 # or a string not beginning with ``/`` containing the requested
110 # or a string not beginning with ``/`` containing the requested
111 # path under the application.
111 # path under the application.
112 dispatchpath = attr.ib()
112 dispatchpath = attr.ib()
113 # The name of the repository being accessed.
113 # The name of the repository being accessed.
114 reponame = attr.ib()
114 reponame = attr.ib()
115 # Raw query string (part after "?" in URL).
115 # Raw query string (part after "?" in URL).
116 querystring = attr.ib()
116 querystring = attr.ib()
117 # multidict of query string parameters.
117 # multidict of query string parameters.
118 qsparams = attr.ib()
118 qsparams = attr.ib()
119 # wsgiref.headers.Headers instance. Operates like a dict with case
119 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # insensitive keys.
120 # insensitive keys.
121 headers = attr.ib()
121 headers = attr.ib()
122 # Request body input stream.
122 # Request body input stream.
123 bodyfh = attr.ib()
123 bodyfh = attr.ib()
124 # WSGI environment dict, unmodified.
124 # WSGI environment dict, unmodified.
125 rawenv = attr.ib()
125 rawenv = attr.ib()
126
126
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
128 """Parse URL components from environment variables.
128 """Parse URL components from environment variables.
129
129
130 WSGI defines request attributes via environment variables. This function
130 WSGI defines request attributes via environment variables. This function
131 parses the environment variables into a data structure.
131 parses the environment variables into a data structure.
132
132
133 If ``reponame`` is defined, the leading path components matching that
133 If ``reponame`` is defined, the leading path components matching that
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 This simulates the world view of a WSGI application that processes
135 This simulates the world view of a WSGI application that processes
136 requests from the base URL of a repo.
136 requests from the base URL of a repo.
137
137
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 is defined, it is used - instead of the WSGI environment variables - for
139 is defined, it is used - instead of the WSGI environment variables - for
140 constructing URL components up to and including the WSGI application path.
140 constructing URL components up to and including the WSGI application path.
141 For example, if the current WSGI application is at ``/repo`` and a request
141 For example, if the current WSGI application is at ``/repo`` and a request
142 is made to ``/rev/@`` with this argument set to
142 is made to ``/rev/@`` with this argument set to
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147
147
148 ``bodyfh`` can be used to specify a file object to read the request body
148 ``bodyfh`` can be used to specify a file object to read the request body
149 from. If not defined, ``wsgi.input`` from the environment dict is used.
149 from. If not defined, ``wsgi.input`` from the environment dict is used.
150 """
150 """
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
152
152
153 # We first validate that the incoming object conforms with the WSGI spec.
153 # We first validate that the incoming object conforms with the WSGI spec.
154 # We only want to be dealing with spec-conforming WSGI implementations.
154 # We only want to be dealing with spec-conforming WSGI implementations.
155 # TODO enable this once we fix internal violations.
155 # TODO enable this once we fix internal violations.
156 #wsgiref.validate.check_environ(env)
156 #wsgiref.validate.check_environ(env)
157
157
158 # PEP-0333 states that environment keys and values are native strings
158 # PEP-0333 states that environment keys and values are native strings
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
161 # in Mercurial, so mass convert string keys and values to bytes.
161 # in Mercurial, so mass convert string keys and values to bytes.
162 if pycompat.ispy3:
162 if pycompat.ispy3:
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v
165 for k, v in env.iteritems()}
165 for k, v in env.iteritems()}
166
166
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly
168 # to an hgweb instance using this environment variable. This was always
168 # to an hgweb instance using this environment variable. This was always
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
170 if not reponame:
170 if not reponame:
171 reponame = env.get('REPO_NAME')
171 reponame = env.get('REPO_NAME')
172
172
173 if altbaseurl:
173 if altbaseurl:
174 altbaseurl = util.url(altbaseurl)
174 altbaseurl = util.url(altbaseurl)
175
175
176 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
176 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
177 # the environment variables.
177 # the environment variables.
178 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
178 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
179 # how URLs are reconstructed.
179 # how URLs are reconstructed.
180 fullurl = env['wsgi.url_scheme'] + '://'
180 fullurl = env['wsgi.url_scheme'] + '://'
181
181
182 if altbaseurl and altbaseurl.scheme:
182 if altbaseurl and altbaseurl.scheme:
183 advertisedfullurl = altbaseurl.scheme + '://'
183 advertisedfullurl = altbaseurl.scheme + '://'
184 else:
184 else:
185 advertisedfullurl = fullurl
185 advertisedfullurl = fullurl
186
186
187 def addport(s, port):
187 def addport(s, port):
188 if s.startswith('https://'):
188 if s.startswith('https://'):
189 if port != '443':
189 if port != '443':
190 s += ':' + port
190 s += ':' + port
191 else:
191 else:
192 if port != '80':
192 if port != '80':
193 s += ':' + port
193 s += ':' + port
194
194
195 return s
195 return s
196
196
197 if env.get('HTTP_HOST'):
197 if env.get('HTTP_HOST'):
198 fullurl += env['HTTP_HOST']
198 fullurl += env['HTTP_HOST']
199 else:
199 else:
200 fullurl += env['SERVER_NAME']
200 fullurl += env['SERVER_NAME']
201 fullurl = addport(fullurl, env['SERVER_PORT'])
201 fullurl = addport(fullurl, env['SERVER_PORT'])
202
202
203 if altbaseurl and altbaseurl.host:
203 if altbaseurl and altbaseurl.host:
204 advertisedfullurl += altbaseurl.host
204 advertisedfullurl += altbaseurl.host
205
205
206 if altbaseurl.port:
206 if altbaseurl.port:
207 port = altbaseurl.port
207 port = altbaseurl.port
208 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
208 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
209 port = '80'
209 port = '80'
210 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
210 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
211 port = '443'
211 port = '443'
212 else:
212 else:
213 port = env['SERVER_PORT']
213 port = env['SERVER_PORT']
214
214
215 advertisedfullurl = addport(advertisedfullurl, port)
215 advertisedfullurl = addport(advertisedfullurl, port)
216 else:
216 else:
217 advertisedfullurl += env['SERVER_NAME']
217 advertisedfullurl += env['SERVER_NAME']
218 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
218 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
219
219
220 baseurl = fullurl
220 baseurl = fullurl
221 advertisedbaseurl = advertisedfullurl
221 advertisedbaseurl = advertisedfullurl
222
222
223 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
223 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
224 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
225
225
226 if altbaseurl:
226 if altbaseurl:
227 path = altbaseurl.path or ''
227 path = altbaseurl.path or ''
228 if path and not path.startswith('/'):
228 if path and not path.startswith('/'):
229 path = '/' + path
229 path = '/' + path
230 advertisedfullurl += util.urlreq.quote(path)
230 advertisedfullurl += util.urlreq.quote(path)
231 else:
231 else:
232 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
232 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
233
233
234 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
234 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
235
235
236 if env.get('QUERY_STRING'):
236 if env.get('QUERY_STRING'):
237 fullurl += '?' + env['QUERY_STRING']
237 fullurl += '?' + env['QUERY_STRING']
238 advertisedfullurl += '?' + env['QUERY_STRING']
238 advertisedfullurl += '?' + env['QUERY_STRING']
239
239
240 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
240 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
241 # that represents the repository being dispatched to. When computing
241 # that represents the repository being dispatched to. When computing
242 # the dispatch info, we ignore these leading path components.
242 # the dispatch info, we ignore these leading path components.
243
243
244 if altbaseurl:
244 if altbaseurl:
245 apppath = altbaseurl.path or ''
245 apppath = altbaseurl.path or ''
246 if apppath and not apppath.startswith('/'):
246 if apppath and not apppath.startswith('/'):
247 apppath = '/' + apppath
247 apppath = '/' + apppath
248 else:
248 else:
249 apppath = env.get('SCRIPT_NAME', '')
249 apppath = env.get('SCRIPT_NAME', '')
250
250
251 if reponame:
251 if reponame:
252 repoprefix = '/' + reponame.strip('/')
252 repoprefix = '/' + reponame.strip('/')
253
253
254 if not env.get('PATH_INFO'):
254 if not env.get('PATH_INFO'):
255 raise error.ProgrammingError('reponame requires PATH_INFO')
255 raise error.ProgrammingError('reponame requires PATH_INFO')
256
256
257 if not env['PATH_INFO'].startswith(repoprefix):
257 if not env['PATH_INFO'].startswith(repoprefix):
258 raise error.ProgrammingError('PATH_INFO does not begin with repo '
258 raise error.ProgrammingError('PATH_INFO does not begin with repo '
259 'name: %s (%s)' % (env['PATH_INFO'],
259 'name: %s (%s)' % (env['PATH_INFO'],
260 reponame))
260 reponame))
261
261
262 dispatchpath = env['PATH_INFO'][len(repoprefix):]
262 dispatchpath = env['PATH_INFO'][len(repoprefix):]
263
263
264 if dispatchpath and not dispatchpath.startswith('/'):
264 if dispatchpath and not dispatchpath.startswith('/'):
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
266 'not end at path delimiter: %s (%s)' %
266 'not end at path delimiter: %s (%s)' %
267 (env['PATH_INFO'], reponame))
267 (env['PATH_INFO'], reponame))
268
268
269 apppath = apppath.rstrip('/') + repoprefix
269 apppath = apppath.rstrip('/') + repoprefix
270 dispatchparts = dispatchpath.strip('/').split('/')
270 dispatchparts = dispatchpath.strip('/').split('/')
271 dispatchpath = '/'.join(dispatchparts)
271 dispatchpath = '/'.join(dispatchparts)
272
272
273 elif 'PATH_INFO' in env:
273 elif 'PATH_INFO' in env:
274 if env['PATH_INFO'].strip('/'):
274 if env['PATH_INFO'].strip('/'):
275 dispatchparts = env['PATH_INFO'].strip('/').split('/')
275 dispatchparts = env['PATH_INFO'].strip('/').split('/')
276 dispatchpath = '/'.join(dispatchparts)
276 dispatchpath = '/'.join(dispatchparts)
277 else:
277 else:
278 dispatchparts = []
278 dispatchparts = []
279 dispatchpath = ''
279 dispatchpath = ''
280 else:
280 else:
281 dispatchparts = []
281 dispatchparts = []
282 dispatchpath = None
282 dispatchpath = None
283
283
284 querystring = env.get('QUERY_STRING', '')
284 querystring = env.get('QUERY_STRING', '')
285
285
286 # We store as a list so we have ordering information. We also store as
286 # We store as a list so we have ordering information. We also store as
287 # a dict to facilitate fast lookup.
287 # a dict to facilitate fast lookup.
288 qsparams = multidict()
288 qsparams = multidict()
289 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
289 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
290 qsparams.add(k, v)
290 qsparams.add(k, v)
291
291
292 # HTTP_* keys contain HTTP request headers. The Headers structure should
292 # HTTP_* keys contain HTTP request headers. The Headers structure should
293 # perform case normalization for us. We just rewrite underscore to dash
293 # perform case normalization for us. We just rewrite underscore to dash
294 # so keys match what likely went over the wire.
294 # so keys match what likely went over the wire.
295 headers = []
295 headers = []
296 for k, v in env.iteritems():
296 for k, v in env.iteritems():
297 if k.startswith('HTTP_'):
297 if k.startswith('HTTP_'):
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
299
299
300 from . import wsgiheaders # avoid cycle
300 from . import wsgiheaders # avoid cycle
301 headers = wsgiheaders.Headers(headers)
301 headers = wsgiheaders.Headers(headers)
302
302
303 # This is kind of a lie because the HTTP header wasn't explicitly
303 # This is kind of a lie because the HTTP header wasn't explicitly
304 # sent. But for all intents and purposes it should be OK to lie about
304 # sent. But for all intents and purposes it should be OK to lie about
305 # this, since a consumer will either either value to determine how many
305 # this, since a consumer will either either value to determine how many
306 # bytes are available to read.
306 # bytes are available to read.
307 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
307 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
308 headers['Content-Length'] = env['CONTENT_LENGTH']
308 headers['Content-Length'] = env['CONTENT_LENGTH']
309
309
310 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
310 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
311 headers['Content-Type'] = env['CONTENT_TYPE']
311 headers['Content-Type'] = env['CONTENT_TYPE']
312
312
313 if bodyfh is None:
313 if bodyfh is None:
314 bodyfh = env['wsgi.input']
314 bodyfh = env['wsgi.input']
315 if 'Content-Length' in headers:
315 if 'Content-Length' in headers:
316 bodyfh = util.cappedreader(bodyfh,
316 bodyfh = util.cappedreader(bodyfh,
317 int(headers['Content-Length'] or '0'))
317 int(headers['Content-Length'] or '0'))
318
318
319 return parsedrequest(method=env['REQUEST_METHOD'],
319 return parsedrequest(method=env['REQUEST_METHOD'],
320 url=fullurl, baseurl=baseurl,
320 url=fullurl, baseurl=baseurl,
321 advertisedurl=advertisedfullurl,
321 advertisedurl=advertisedfullurl,
322 advertisedbaseurl=advertisedbaseurl,
322 advertisedbaseurl=advertisedbaseurl,
323 urlscheme=env['wsgi.url_scheme'],
323 urlscheme=env['wsgi.url_scheme'],
324 remoteuser=env.get('REMOTE_USER'),
324 remoteuser=env.get('REMOTE_USER'),
325 remotehost=env.get('REMOTE_HOST'),
325 remotehost=env.get('REMOTE_HOST'),
326 apppath=apppath,
326 apppath=apppath,
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
328 reponame=reponame,
328 reponame=reponame,
329 querystring=querystring,
329 querystring=querystring,
330 qsparams=qsparams,
330 qsparams=qsparams,
331 headers=headers,
331 headers=headers,
332 bodyfh=bodyfh,
332 bodyfh=bodyfh,
333 rawenv=env)
333 rawenv=env)
334
334
335 class offsettrackingwriter(object):
335 class offsettrackingwriter(object):
336 """A file object like object that is append only and tracks write count.
336 """A file object like object that is append only and tracks write count.
337
337
338 Instances are bound to a callable. This callable is called with data
338 Instances are bound to a callable. This callable is called with data
339 whenever a ``write()`` is attempted.
339 whenever a ``write()`` is attempted.
340
340
341 Instances track the amount of written data so they can answer ``tell()``
341 Instances track the amount of written data so they can answer ``tell()``
342 requests.
342 requests.
343
343
344 The intent of this class is to wrap the ``write()`` function returned by
344 The intent of this class is to wrap the ``write()`` function returned by
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
346 not a file object, it doesn't implement other file object methods.
346 not a file object, it doesn't implement other file object methods.
347 """
347 """
348 def __init__(self, writefn):
348 def __init__(self, writefn):
349 self._write = writefn
349 self._write = writefn
350 self._offset = 0
350 self._offset = 0
351
351
352 def write(self, s):
352 def write(self, s):
353 res = self._write(s)
353 res = self._write(s)
354 # Some Python objects don't report the number of bytes written.
354 # Some Python objects don't report the number of bytes written.
355 if res is None:
355 if res is None:
356 self._offset += len(s)
356 self._offset += len(s)
357 else:
357 else:
358 self._offset += res
358 self._offset += res
359
359
360 def flush(self):
360 def flush(self):
361 pass
361 pass
362
362
363 def tell(self):
363 def tell(self):
364 return self._offset
364 return self._offset
365
365
366 class wsgiresponse(object):
366 class wsgiresponse(object):
367 """Represents a response to a WSGI request.
367 """Represents a response to a WSGI request.
368
368
369 A response consists of a status line, headers, and a body.
369 A response consists of a status line, headers, and a body.
370
370
371 Consumers must populate the ``status`` and ``headers`` fields and
371 Consumers must populate the ``status`` and ``headers`` fields and
372 make a call to a ``setbody*()`` method before the response can be
372 make a call to a ``setbody*()`` method before the response can be
373 issued.
373 issued.
374
374
375 When it is time to start sending the response over the wire,
375 When it is time to start sending the response over the wire,
376 ``sendresponse()`` is called. It handles emitting the header portion
376 ``sendresponse()`` is called. It handles emitting the header portion
377 of the response message. It then yields chunks of body data to be
377 of the response message. It then yields chunks of body data to be
378 written to the peer. Typically, the WSGI application itself calls
378 written to the peer. Typically, the WSGI application itself calls
379 and returns the value from ``sendresponse()``.
379 and returns the value from ``sendresponse()``.
380 """
380 """
381
381
382 def __init__(self, req, startresponse):
382 def __init__(self, req, startresponse):
383 """Create an empty response tied to a specific request.
383 """Create an empty response tied to a specific request.
384
384
385 ``req`` is a ``parsedrequest``. ``startresponse`` is the
385 ``req`` is a ``parsedrequest``. ``startresponse`` is the
386 ``start_response`` function passed to the WSGI application.
386 ``start_response`` function passed to the WSGI application.
387 """
387 """
388 self._req = req
388 self._req = req
389 self._startresponse = startresponse
389 self._startresponse = startresponse
390
390
391 self.status = None
391 self.status = None
392 from . import wsgiheaders # avoid cycle
392 from . import wsgiheaders # avoid cycle
393 self.headers = wsgiheaders.Headers([])
393 self.headers = wsgiheaders.Headers([])
394
394
395 self._bodybytes = None
395 self._bodybytes = None
396 self._bodygen = None
396 self._bodygen = None
397 self._bodywillwrite = False
397 self._bodywillwrite = False
398 self._started = False
398 self._started = False
399 self._bodywritefn = None
399 self._bodywritefn = None
400
400
401 def _verifybody(self):
401 def _verifybody(self):
402 if (self._bodybytes is not None or self._bodygen is not None
402 if (self._bodybytes is not None or self._bodygen is not None
403 or self._bodywillwrite):
403 or self._bodywillwrite):
404 raise error.ProgrammingError('cannot define body multiple times')
404 raise error.ProgrammingError('cannot define body multiple times')
405
405
406 def setbodybytes(self, b):
406 def setbodybytes(self, b):
407 """Define the response body as static bytes.
407 """Define the response body as static bytes.
408
408
409 The empty string signals that there is no response body.
409 The empty string signals that there is no response body.
410 """
410 """
411 self._verifybody()
411 self._verifybody()
412 self._bodybytes = b
412 self._bodybytes = b
413 self.headers['Content-Length'] = '%d' % len(b)
413 self.headers['Content-Length'] = '%d' % len(b)
414
414
415 def setbodygen(self, gen):
415 def setbodygen(self, gen):
416 """Define the response body as a generator of bytes."""
416 """Define the response body as a generator of bytes."""
417 self._verifybody()
417 self._verifybody()
418 self._bodygen = gen
418 self._bodygen = gen
419
419
420 def setbodywillwrite(self):
420 def setbodywillwrite(self):
421 """Signal an intent to use write() to emit the response body.
421 """Signal an intent to use write() to emit the response body.
422
422
423 **This is the least preferred way to send a body.**
423 **This is the least preferred way to send a body.**
424
424
425 It is preferred for WSGI applications to emit a generator of chunks
425 It is preferred for WSGI applications to emit a generator of chunks
426 constituting the response body. However, some consumers can't emit
426 constituting the response body. However, some consumers can't emit
427 data this way. So, WSGI provides a way to obtain a ``write(data)``
427 data this way. So, WSGI provides a way to obtain a ``write(data)``
428 function that can be used to synchronously perform an unbuffered
428 function that can be used to synchronously perform an unbuffered
429 write.
429 write.
430
430
431 Calling this function signals an intent to produce the body in this
431 Calling this function signals an intent to produce the body in this
432 manner.
432 manner.
433 """
433 """
434 self._verifybody()
434 self._verifybody()
435 self._bodywillwrite = True
435 self._bodywillwrite = True
436
436
437 def sendresponse(self):
437 def sendresponse(self):
438 """Send the generated response to the client.
438 """Send the generated response to the client.
439
439
440 Before this is called, ``status`` must be set and one of
440 Before this is called, ``status`` must be set and one of
441 ``setbodybytes()`` or ``setbodygen()`` must be called.
441 ``setbodybytes()`` or ``setbodygen()`` must be called.
442
442
443 Calling this method multiple times is not allowed.
443 Calling this method multiple times is not allowed.
444 """
444 """
445 if self._started:
445 if self._started:
446 raise error.ProgrammingError('sendresponse() called multiple times')
446 raise error.ProgrammingError('sendresponse() called multiple times')
447
447
448 self._started = True
448 self._started = True
449
449
450 if not self.status:
450 if not self.status:
451 raise error.ProgrammingError('status line not defined')
451 raise error.ProgrammingError('status line not defined')
452
452
453 if (self._bodybytes is None and self._bodygen is None
453 if (self._bodybytes is None and self._bodygen is None
454 and not self._bodywillwrite):
454 and not self._bodywillwrite):
455 raise error.ProgrammingError('response body not defined')
455 raise error.ProgrammingError('response body not defined')
456
456
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
459 # and SHOULD NOT generate other headers unless they could be used
459 # and SHOULD NOT generate other headers unless they could be used
460 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
460 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
461 # states that no response body can be issued. Content-Length can
461 # states that no response body can be issued. Content-Length can
462 # be sent. But if it is present, it should be the size of the response
462 # be sent. But if it is present, it should be the size of the response
463 # that wasn't transferred.
463 # that wasn't transferred.
464 if self.status.startswith('304 '):
464 if self.status.startswith('304 '):
465 # setbodybytes('') will set C-L to 0. This doesn't conform with the
465 # setbodybytes('') will set C-L to 0. This doesn't conform with the
466 # spec. So remove it.
466 # spec. So remove it.
467 if self.headers.get('Content-Length') == '0':
467 if self.headers.get('Content-Length') == '0':
468 del self.headers['Content-Length']
468 del self.headers['Content-Length']
469
469
470 # Strictly speaking, this is too strict. But until it causes
470 # Strictly speaking, this is too strict. But until it causes
471 # problems, let's be strict.
471 # problems, let's be strict.
472 badheaders = {k for k in self.headers.keys()
472 badheaders = {k for k in self.headers.keys()
473 if k.lower() not in ('date', 'etag', 'expires',
473 if k.lower() not in ('date', 'etag', 'expires',
474 'cache-control',
474 'cache-control',
475 'content-location',
475 'content-location',
476 'content-security-policy',
476 'content-security-policy',
477 'vary')}
477 'vary')}
478 if badheaders:
478 if badheaders:
479 raise error.ProgrammingError(
479 raise error.ProgrammingError(
480 'illegal header on 304 response: %s' %
480 'illegal header on 304 response: %s' %
481 ', '.join(sorted(badheaders)))
481 ', '.join(sorted(badheaders)))
482
482
483 if self._bodygen is not None or self._bodywillwrite:
483 if self._bodygen is not None or self._bodywillwrite:
484 raise error.ProgrammingError("must use setbodybytes('') with "
484 raise error.ProgrammingError("must use setbodybytes('') with "
485 "304 responses")
485 "304 responses")
486
486
487 # Various HTTP clients (notably httplib) won't read the HTTP response
487 # Various HTTP clients (notably httplib) won't read the HTTP response
488 # until the HTTP request has been sent in full. If servers (us) send a
488 # until the HTTP request has been sent in full. If servers (us) send a
489 # response before the HTTP request has been fully sent, the connection
489 # response before the HTTP request has been fully sent, the connection
490 # may deadlock because neither end is reading.
490 # may deadlock because neither end is reading.
491 #
491 #
492 # We work around this by "draining" the request data before
492 # We work around this by "draining" the request data before
493 # sending any response in some conditions.
493 # sending any response in some conditions.
494 drain = False
494 drain = False
495 close = False
495 close = False
496
496
497 # If the client sent Expect: 100-continue, we assume it is smart enough
497 # If the client sent Expect: 100-continue, we assume it is smart enough
498 # to deal with the server sending a response before reading the request.
498 # to deal with the server sending a response before reading the request.
499 # (httplib doesn't do this.)
499 # (httplib doesn't do this.)
500 if self._req.headers.get('Expect', '').lower() == '100-continue':
500 if self._req.headers.get('Expect', '').lower() == '100-continue':
501 pass
501 pass
502 # Only tend to request methods that have bodies. Strictly speaking,
502 # Only tend to request methods that have bodies. Strictly speaking,
503 # we should sniff for a body. But this is fine for our existing
503 # we should sniff for a body. But this is fine for our existing
504 # WSGI applications.
504 # WSGI applications.
505 elif self._req.method not in ('POST', 'PUT'):
505 elif self._req.method not in ('POST', 'PUT'):
506 pass
506 pass
507 else:
507 else:
508 # If we don't know how much data to read, there's no guarantee
508 # If we don't know how much data to read, there's no guarantee
509 # that we can drain the request responsibly. The WSGI
509 # that we can drain the request responsibly. The WSGI
510 # specification only says that servers *should* ensure the
510 # specification only says that servers *should* ensure the
511 # input stream doesn't overrun the actual request. So there's
511 # input stream doesn't overrun the actual request. So there's
512 # no guarantee that reading until EOF won't corrupt the stream
512 # no guarantee that reading until EOF won't corrupt the stream
513 # state.
513 # state.
514 if not isinstance(self._req.bodyfh, util.cappedreader):
514 if not isinstance(self._req.bodyfh, util.cappedreader):
515 close = True
515 close = True
516 else:
516 else:
517 # We /could/ only drain certain HTTP response codes. But 200 and
517 # We /could/ only drain certain HTTP response codes. But 200 and
518 # non-200 wire protocol responses both require draining. Since
518 # non-200 wire protocol responses both require draining. Since
519 # we have a capped reader in place for all situations where we
519 # we have a capped reader in place for all situations where we
520 # drain, it is safe to read from that stream. We'll either do
520 # drain, it is safe to read from that stream. We'll either do
521 # a drain or no-op if we're already at EOF.
521 # a drain or no-op if we're already at EOF.
522 drain = True
522 drain = True
523
523
524 if close:
524 if close:
525 self.headers['Connection'] = 'Close'
525 self.headers['Connection'] = 'Close'
526
526
527 if drain:
527 if drain:
528 assert isinstance(self._req.bodyfh, util.cappedreader)
528 assert isinstance(self._req.bodyfh, util.cappedreader)
529 while True:
529 while True:
530 chunk = self._req.bodyfh.read(32768)
530 chunk = self._req.bodyfh.read(32768)
531 if not chunk:
531 if not chunk:
532 break
532 break
533
533
534 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
534 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
535 k, v in self.headers.items()]
535 k, v in self.headers.items()]
536 write = self._startresponse(pycompat.sysstr(self.status),
536 write = self._startresponse(pycompat.sysstr(self.status),
537 strheaders)
537 strheaders)
538
538
539 if self._bodybytes:
539 if self._bodybytes:
540 yield self._bodybytes
540 yield self._bodybytes
541 elif self._bodygen:
541 elif self._bodygen:
542 for chunk in self._bodygen:
542 for chunk in self._bodygen:
543 # PEP-3333 says that output must be bytes. And some WSGI
544 # implementations enforce this. We cast bytes-like types here
545 # for convenience.
546 if isinstance(chunk, bytearray):
547 chunk = bytes(chunk)
548
543 yield chunk
549 yield chunk
544 elif self._bodywillwrite:
550 elif self._bodywillwrite:
545 self._bodywritefn = write
551 self._bodywritefn = write
546 else:
552 else:
547 error.ProgrammingError('do not know how to send body')
553 error.ProgrammingError('do not know how to send body')
548
554
549 def getbodyfile(self):
555 def getbodyfile(self):
550 """Obtain a file object like object representing the response body.
556 """Obtain a file object like object representing the response body.
551
557
552 For this to work, you must call ``setbodywillwrite()`` and then
558 For this to work, you must call ``setbodywillwrite()`` and then
553 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
559 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
554 function won't run to completion unless the generator is advanced. The
560 function won't run to completion unless the generator is advanced. The
555 generator yields not items. The easiest way to consume it is with
561 generator yields not items. The easiest way to consume it is with
556 ``list(res.sendresponse())``, which should resolve to an empty list -
562 ``list(res.sendresponse())``, which should resolve to an empty list -
557 ``[]``.
563 ``[]``.
558 """
564 """
559 if not self._bodywillwrite:
565 if not self._bodywillwrite:
560 raise error.ProgrammingError('must call setbodywillwrite() first')
566 raise error.ProgrammingError('must call setbodywillwrite() first')
561
567
562 if not self._started:
568 if not self._started:
563 raise error.ProgrammingError('must call sendresponse() first; did '
569 raise error.ProgrammingError('must call sendresponse() first; did '
564 'you remember to consume it since it '
570 'you remember to consume it since it '
565 'is a generator?')
571 'is a generator?')
566
572
567 assert self._bodywritefn
573 assert self._bodywritefn
568 return offsettrackingwriter(self._bodywritefn)
574 return offsettrackingwriter(self._bodywritefn)
569
575
570 def wsgiapplication(app_maker):
576 def wsgiapplication(app_maker):
571 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
577 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
572 can and should now be used as a WSGI application.'''
578 can and should now be used as a WSGI application.'''
573 application = app_maker()
579 application = app_maker()
574 def run_wsgi(env, respond):
580 def run_wsgi(env, respond):
575 return application(env, respond)
581 return application(env, respond)
576 return run_wsgi
582 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now