##// END OF EJS Templates
hgweb: put response headers back into str for Python 3...
Augie Fackler -
r37607:e320d940 default
parent child Browse files
Show More
@@ -1,561 +1,563
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import wsgiref.headers as wsgiheaders
11 import wsgiref.headers as wsgiheaders
12 #import wsgiref.validate
12 #import wsgiref.validate
13
13
14 from ..thirdparty import (
14 from ..thirdparty import (
15 attr,
15 attr,
16 )
16 )
17 from .. import (
17 from .. import (
18 error,
18 error,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22
22
23 class multidict(object):
23 class multidict(object):
24 """A dict like object that can store multiple values for a key.
24 """A dict like object that can store multiple values for a key.
25
25
26 Used to store parsed request parameters.
26 Used to store parsed request parameters.
27
27
28 This is inspired by WebOb's class of the same name.
28 This is inspired by WebOb's class of the same name.
29 """
29 """
30 def __init__(self):
30 def __init__(self):
31 self._items = {}
31 self._items = {}
32
32
33 def __getitem__(self, key):
33 def __getitem__(self, key):
34 """Returns the last set value for a key."""
34 """Returns the last set value for a key."""
35 return self._items[key][-1]
35 return self._items[key][-1]
36
36
37 def __setitem__(self, key, value):
37 def __setitem__(self, key, value):
38 """Replace a values for a key with a new value."""
38 """Replace a values for a key with a new value."""
39 self._items[key] = [value]
39 self._items[key] = [value]
40
40
41 def __delitem__(self, key):
41 def __delitem__(self, key):
42 """Delete all values for a key."""
42 """Delete all values for a key."""
43 del self._items[key]
43 del self._items[key]
44
44
45 def __contains__(self, key):
45 def __contains__(self, key):
46 return key in self._items
46 return key in self._items
47
47
48 def __len__(self):
48 def __len__(self):
49 return len(self._items)
49 return len(self._items)
50
50
51 def get(self, key, default=None):
51 def get(self, key, default=None):
52 try:
52 try:
53 return self.__getitem__(key)
53 return self.__getitem__(key)
54 except KeyError:
54 except KeyError:
55 return default
55 return default
56
56
57 def add(self, key, value):
57 def add(self, key, value):
58 """Add a new value for a key. Does not replace existing values."""
58 """Add a new value for a key. Does not replace existing values."""
59 self._items.setdefault(key, []).append(value)
59 self._items.setdefault(key, []).append(value)
60
60
61 def getall(self, key):
61 def getall(self, key):
62 """Obtains all values for a key."""
62 """Obtains all values for a key."""
63 return self._items.get(key, [])
63 return self._items.get(key, [])
64
64
65 def getone(self, key):
65 def getone(self, key):
66 """Obtain a single value for a key.
66 """Obtain a single value for a key.
67
67
68 Raises KeyError if key not defined or it has multiple values set.
68 Raises KeyError if key not defined or it has multiple values set.
69 """
69 """
70 vals = self._items[key]
70 vals = self._items[key]
71
71
72 if len(vals) > 1:
72 if len(vals) > 1:
73 raise KeyError('multiple values for %r' % key)
73 raise KeyError('multiple values for %r' % key)
74
74
75 return vals[0]
75 return vals[0]
76
76
77 def asdictoflists(self):
77 def asdictoflists(self):
78 return {k: list(v) for k, v in self._items.iteritems()}
78 return {k: list(v) for k, v in self._items.iteritems()}
79
79
80 @attr.s(frozen=True)
80 @attr.s(frozen=True)
81 class parsedrequest(object):
81 class parsedrequest(object):
82 """Represents a parsed WSGI request.
82 """Represents a parsed WSGI request.
83
83
84 Contains both parsed parameters as well as a handle on the input stream.
84 Contains both parsed parameters as well as a handle on the input stream.
85 """
85 """
86
86
87 # Request method.
87 # Request method.
88 method = attr.ib()
88 method = attr.ib()
89 # Full URL for this request.
89 # Full URL for this request.
90 url = attr.ib()
90 url = attr.ib()
91 # URL without any path components. Just <proto>://<host><port>.
91 # URL without any path components. Just <proto>://<host><port>.
92 baseurl = attr.ib()
92 baseurl = attr.ib()
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
94 # of HTTP: Host header for hostname. This is likely what clients used.
94 # of HTTP: Host header for hostname. This is likely what clients used.
95 advertisedurl = attr.ib()
95 advertisedurl = attr.ib()
96 advertisedbaseurl = attr.ib()
96 advertisedbaseurl = attr.ib()
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
98 urlscheme = attr.ib()
98 urlscheme = attr.ib()
99 # Value of REMOTE_USER, if set, or None.
99 # Value of REMOTE_USER, if set, or None.
100 remoteuser = attr.ib()
100 remoteuser = attr.ib()
101 # Value of REMOTE_HOST, if set, or None.
101 # Value of REMOTE_HOST, if set, or None.
102 remotehost = attr.ib()
102 remotehost = attr.ib()
103 # Relative WSGI application path. If defined, will begin with a
103 # Relative WSGI application path. If defined, will begin with a
104 # ``/``.
104 # ``/``.
105 apppath = attr.ib()
105 apppath = attr.ib()
106 # List of path parts to be used for dispatch.
106 # List of path parts to be used for dispatch.
107 dispatchparts = attr.ib()
107 dispatchparts = attr.ib()
108 # URL path component (no query string) used for dispatch. Can be
108 # URL path component (no query string) used for dispatch. Can be
109 # ``None`` to signal no path component given to the request, an
109 # ``None`` to signal no path component given to the request, an
110 # empty string to signal a request to the application's root URL,
110 # empty string to signal a request to the application's root URL,
111 # or a string not beginning with ``/`` containing the requested
111 # or a string not beginning with ``/`` containing the requested
112 # path under the application.
112 # path under the application.
113 dispatchpath = attr.ib()
113 dispatchpath = attr.ib()
114 # The name of the repository being accessed.
114 # The name of the repository being accessed.
115 reponame = attr.ib()
115 reponame = attr.ib()
116 # Raw query string (part after "?" in URL).
116 # Raw query string (part after "?" in URL).
117 querystring = attr.ib()
117 querystring = attr.ib()
118 # multidict of query string parameters.
118 # multidict of query string parameters.
119 qsparams = attr.ib()
119 qsparams = attr.ib()
120 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # wsgiref.headers.Headers instance. Operates like a dict with case
121 # insensitive keys.
121 # insensitive keys.
122 headers = attr.ib()
122 headers = attr.ib()
123 # Request body input stream.
123 # Request body input stream.
124 bodyfh = attr.ib()
124 bodyfh = attr.ib()
125 # WSGI environment dict, unmodified.
125 # WSGI environment dict, unmodified.
126 rawenv = attr.ib()
126 rawenv = attr.ib()
127
127
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
129 """Parse URL components from environment variables.
129 """Parse URL components from environment variables.
130
130
131 WSGI defines request attributes via environment variables. This function
131 WSGI defines request attributes via environment variables. This function
132 parses the environment variables into a data structure.
132 parses the environment variables into a data structure.
133
133
134 If ``reponame`` is defined, the leading path components matching that
134 If ``reponame`` is defined, the leading path components matching that
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
136 This simulates the world view of a WSGI application that processes
136 This simulates the world view of a WSGI application that processes
137 requests from the base URL of a repo.
137 requests from the base URL of a repo.
138
138
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
140 is defined, it is used - instead of the WSGI environment variables - for
140 is defined, it is used - instead of the WSGI environment variables - for
141 constructing URL components up to and including the WSGI application path.
141 constructing URL components up to and including the WSGI application path.
142 For example, if the current WSGI application is at ``/repo`` and a request
142 For example, if the current WSGI application is at ``/repo`` and a request
143 is made to ``/rev/@`` with this argument set to
143 is made to ``/rev/@`` with this argument set to
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
148 """
148 """
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
150
150
151 # We first validate that the incoming object conforms with the WSGI spec.
151 # We first validate that the incoming object conforms with the WSGI spec.
152 # We only want to be dealing with spec-conforming WSGI implementations.
152 # We only want to be dealing with spec-conforming WSGI implementations.
153 # TODO enable this once we fix internal violations.
153 # TODO enable this once we fix internal violations.
154 #wsgiref.validate.check_environ(env)
154 #wsgiref.validate.check_environ(env)
155
155
156 # PEP-0333 states that environment keys and values are native strings
156 # PEP-0333 states that environment keys and values are native strings
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
159 # in Mercurial, so mass convert string keys and values to bytes.
159 # in Mercurial, so mass convert string keys and values to bytes.
160 if pycompat.ispy3:
160 if pycompat.ispy3:
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
163 for k, v in env.iteritems()}
163 for k, v in env.iteritems()}
164
164
165 if altbaseurl:
165 if altbaseurl:
166 altbaseurl = util.url(altbaseurl)
166 altbaseurl = util.url(altbaseurl)
167
167
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
169 # the environment variables.
169 # the environment variables.
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
171 # how URLs are reconstructed.
171 # how URLs are reconstructed.
172 fullurl = env['wsgi.url_scheme'] + '://'
172 fullurl = env['wsgi.url_scheme'] + '://'
173
173
174 if altbaseurl and altbaseurl.scheme:
174 if altbaseurl and altbaseurl.scheme:
175 advertisedfullurl = altbaseurl.scheme + '://'
175 advertisedfullurl = altbaseurl.scheme + '://'
176 else:
176 else:
177 advertisedfullurl = fullurl
177 advertisedfullurl = fullurl
178
178
179 def addport(s, port):
179 def addport(s, port):
180 if s.startswith('https://'):
180 if s.startswith('https://'):
181 if port != '443':
181 if port != '443':
182 s += ':' + port
182 s += ':' + port
183 else:
183 else:
184 if port != '80':
184 if port != '80':
185 s += ':' + port
185 s += ':' + port
186
186
187 return s
187 return s
188
188
189 if env.get('HTTP_HOST'):
189 if env.get('HTTP_HOST'):
190 fullurl += env['HTTP_HOST']
190 fullurl += env['HTTP_HOST']
191 else:
191 else:
192 fullurl += env['SERVER_NAME']
192 fullurl += env['SERVER_NAME']
193 fullurl = addport(fullurl, env['SERVER_PORT'])
193 fullurl = addport(fullurl, env['SERVER_PORT'])
194
194
195 if altbaseurl and altbaseurl.host:
195 if altbaseurl and altbaseurl.host:
196 advertisedfullurl += altbaseurl.host
196 advertisedfullurl += altbaseurl.host
197
197
198 if altbaseurl.port:
198 if altbaseurl.port:
199 port = altbaseurl.port
199 port = altbaseurl.port
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
201 port = '80'
201 port = '80'
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
203 port = '443'
203 port = '443'
204 else:
204 else:
205 port = env['SERVER_PORT']
205 port = env['SERVER_PORT']
206
206
207 advertisedfullurl = addport(advertisedfullurl, port)
207 advertisedfullurl = addport(advertisedfullurl, port)
208 else:
208 else:
209 advertisedfullurl += env['SERVER_NAME']
209 advertisedfullurl += env['SERVER_NAME']
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
211
211
212 baseurl = fullurl
212 baseurl = fullurl
213 advertisedbaseurl = advertisedfullurl
213 advertisedbaseurl = advertisedfullurl
214
214
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
217
217
218 if altbaseurl:
218 if altbaseurl:
219 path = altbaseurl.path or ''
219 path = altbaseurl.path or ''
220 if path and not path.startswith('/'):
220 if path and not path.startswith('/'):
221 path = '/' + path
221 path = '/' + path
222 advertisedfullurl += util.urlreq.quote(path)
222 advertisedfullurl += util.urlreq.quote(path)
223 else:
223 else:
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
225
225
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
227
227
228 if env.get('QUERY_STRING'):
228 if env.get('QUERY_STRING'):
229 fullurl += '?' + env['QUERY_STRING']
229 fullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
231
231
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
233 # that represents the repository being dispatched to. When computing
233 # that represents the repository being dispatched to. When computing
234 # the dispatch info, we ignore these leading path components.
234 # the dispatch info, we ignore these leading path components.
235
235
236 if altbaseurl:
236 if altbaseurl:
237 apppath = altbaseurl.path or ''
237 apppath = altbaseurl.path or ''
238 if apppath and not apppath.startswith('/'):
238 if apppath and not apppath.startswith('/'):
239 apppath = '/' + apppath
239 apppath = '/' + apppath
240 else:
240 else:
241 apppath = env.get('SCRIPT_NAME', '')
241 apppath = env.get('SCRIPT_NAME', '')
242
242
243 if reponame:
243 if reponame:
244 repoprefix = '/' + reponame.strip('/')
244 repoprefix = '/' + reponame.strip('/')
245
245
246 if not env.get('PATH_INFO'):
246 if not env.get('PATH_INFO'):
247 raise error.ProgrammingError('reponame requires PATH_INFO')
247 raise error.ProgrammingError('reponame requires PATH_INFO')
248
248
249 if not env['PATH_INFO'].startswith(repoprefix):
249 if not env['PATH_INFO'].startswith(repoprefix):
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
251 'name: %s (%s)' % (env['PATH_INFO'],
251 'name: %s (%s)' % (env['PATH_INFO'],
252 reponame))
252 reponame))
253
253
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
255
255
256 if dispatchpath and not dispatchpath.startswith('/'):
256 if dispatchpath and not dispatchpath.startswith('/'):
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
258 'not end at path delimiter: %s (%s)' %
258 'not end at path delimiter: %s (%s)' %
259 (env['PATH_INFO'], reponame))
259 (env['PATH_INFO'], reponame))
260
260
261 apppath = apppath.rstrip('/') + repoprefix
261 apppath = apppath.rstrip('/') + repoprefix
262 dispatchparts = dispatchpath.strip('/').split('/')
262 dispatchparts = dispatchpath.strip('/').split('/')
263 dispatchpath = '/'.join(dispatchparts)
263 dispatchpath = '/'.join(dispatchparts)
264
264
265 elif 'PATH_INFO' in env:
265 elif 'PATH_INFO' in env:
266 if env['PATH_INFO'].strip('/'):
266 if env['PATH_INFO'].strip('/'):
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
268 dispatchpath = '/'.join(dispatchparts)
268 dispatchpath = '/'.join(dispatchparts)
269 else:
269 else:
270 dispatchparts = []
270 dispatchparts = []
271 dispatchpath = ''
271 dispatchpath = ''
272 else:
272 else:
273 dispatchparts = []
273 dispatchparts = []
274 dispatchpath = None
274 dispatchpath = None
275
275
276 querystring = env.get('QUERY_STRING', '')
276 querystring = env.get('QUERY_STRING', '')
277
277
278 # We store as a list so we have ordering information. We also store as
278 # We store as a list so we have ordering information. We also store as
279 # a dict to facilitate fast lookup.
279 # a dict to facilitate fast lookup.
280 qsparams = multidict()
280 qsparams = multidict()
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
282 qsparams.add(k, v)
282 qsparams.add(k, v)
283
283
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
285 # perform case normalization for us. We just rewrite underscore to dash
285 # perform case normalization for us. We just rewrite underscore to dash
286 # so keys match what likely went over the wire.
286 # so keys match what likely went over the wire.
287 headers = []
287 headers = []
288 for k, v in env.iteritems():
288 for k, v in env.iteritems():
289 if k.startswith('HTTP_'):
289 if k.startswith('HTTP_'):
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
291
291
292 headers = wsgiheaders.Headers(headers)
292 headers = wsgiheaders.Headers(headers)
293
293
294 # This is kind of a lie because the HTTP header wasn't explicitly
294 # This is kind of a lie because the HTTP header wasn't explicitly
295 # sent. But for all intents and purposes it should be OK to lie about
295 # sent. But for all intents and purposes it should be OK to lie about
296 # this, since a consumer will either either value to determine how many
296 # this, since a consumer will either either value to determine how many
297 # bytes are available to read.
297 # bytes are available to read.
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
299 headers['Content-Length'] = env['CONTENT_LENGTH']
299 headers['Content-Length'] = env['CONTENT_LENGTH']
300
300
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
302 headers['Content-Type'] = env['CONTENT_TYPE']
302 headers['Content-Type'] = env['CONTENT_TYPE']
303
303
304 bodyfh = env['wsgi.input']
304 bodyfh = env['wsgi.input']
305 if 'Content-Length' in headers:
305 if 'Content-Length' in headers:
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
307
307
308 return parsedrequest(method=env['REQUEST_METHOD'],
308 return parsedrequest(method=env['REQUEST_METHOD'],
309 url=fullurl, baseurl=baseurl,
309 url=fullurl, baseurl=baseurl,
310 advertisedurl=advertisedfullurl,
310 advertisedurl=advertisedfullurl,
311 advertisedbaseurl=advertisedbaseurl,
311 advertisedbaseurl=advertisedbaseurl,
312 urlscheme=env['wsgi.url_scheme'],
312 urlscheme=env['wsgi.url_scheme'],
313 remoteuser=env.get('REMOTE_USER'),
313 remoteuser=env.get('REMOTE_USER'),
314 remotehost=env.get('REMOTE_HOST'),
314 remotehost=env.get('REMOTE_HOST'),
315 apppath=apppath,
315 apppath=apppath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
317 reponame=reponame,
317 reponame=reponame,
318 querystring=querystring,
318 querystring=querystring,
319 qsparams=qsparams,
319 qsparams=qsparams,
320 headers=headers,
320 headers=headers,
321 bodyfh=bodyfh,
321 bodyfh=bodyfh,
322 rawenv=env)
322 rawenv=env)
323
323
324 class offsettrackingwriter(object):
324 class offsettrackingwriter(object):
325 """A file object like object that is append only and tracks write count.
325 """A file object like object that is append only and tracks write count.
326
326
327 Instances are bound to a callable. This callable is called with data
327 Instances are bound to a callable. This callable is called with data
328 whenever a ``write()`` is attempted.
328 whenever a ``write()`` is attempted.
329
329
330 Instances track the amount of written data so they can answer ``tell()``
330 Instances track the amount of written data so they can answer ``tell()``
331 requests.
331 requests.
332
332
333 The intent of this class is to wrap the ``write()`` function returned by
333 The intent of this class is to wrap the ``write()`` function returned by
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
335 not a file object, it doesn't implement other file object methods.
335 not a file object, it doesn't implement other file object methods.
336 """
336 """
337 def __init__(self, writefn):
337 def __init__(self, writefn):
338 self._write = writefn
338 self._write = writefn
339 self._offset = 0
339 self._offset = 0
340
340
341 def write(self, s):
341 def write(self, s):
342 res = self._write(s)
342 res = self._write(s)
343 # Some Python objects don't report the number of bytes written.
343 # Some Python objects don't report the number of bytes written.
344 if res is None:
344 if res is None:
345 self._offset += len(s)
345 self._offset += len(s)
346 else:
346 else:
347 self._offset += res
347 self._offset += res
348
348
349 def flush(self):
349 def flush(self):
350 pass
350 pass
351
351
352 def tell(self):
352 def tell(self):
353 return self._offset
353 return self._offset
354
354
355 class wsgiresponse(object):
355 class wsgiresponse(object):
356 """Represents a response to a WSGI request.
356 """Represents a response to a WSGI request.
357
357
358 A response consists of a status line, headers, and a body.
358 A response consists of a status line, headers, and a body.
359
359
360 Consumers must populate the ``status`` and ``headers`` fields and
360 Consumers must populate the ``status`` and ``headers`` fields and
361 make a call to a ``setbody*()`` method before the response can be
361 make a call to a ``setbody*()`` method before the response can be
362 issued.
362 issued.
363
363
364 When it is time to start sending the response over the wire,
364 When it is time to start sending the response over the wire,
365 ``sendresponse()`` is called. It handles emitting the header portion
365 ``sendresponse()`` is called. It handles emitting the header portion
366 of the response message. It then yields chunks of body data to be
366 of the response message. It then yields chunks of body data to be
367 written to the peer. Typically, the WSGI application itself calls
367 written to the peer. Typically, the WSGI application itself calls
368 and returns the value from ``sendresponse()``.
368 and returns the value from ``sendresponse()``.
369 """
369 """
370
370
371 def __init__(self, req, startresponse):
371 def __init__(self, req, startresponse):
372 """Create an empty response tied to a specific request.
372 """Create an empty response tied to a specific request.
373
373
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
375 ``start_response`` function passed to the WSGI application.
375 ``start_response`` function passed to the WSGI application.
376 """
376 """
377 self._req = req
377 self._req = req
378 self._startresponse = startresponse
378 self._startresponse = startresponse
379
379
380 self.status = None
380 self.status = None
381 self.headers = wsgiheaders.Headers([])
381 self.headers = wsgiheaders.Headers([])
382
382
383 self._bodybytes = None
383 self._bodybytes = None
384 self._bodygen = None
384 self._bodygen = None
385 self._bodywillwrite = False
385 self._bodywillwrite = False
386 self._started = False
386 self._started = False
387 self._bodywritefn = None
387 self._bodywritefn = None
388
388
389 def _verifybody(self):
389 def _verifybody(self):
390 if (self._bodybytes is not None or self._bodygen is not None
390 if (self._bodybytes is not None or self._bodygen is not None
391 or self._bodywillwrite):
391 or self._bodywillwrite):
392 raise error.ProgrammingError('cannot define body multiple times')
392 raise error.ProgrammingError('cannot define body multiple times')
393
393
394 def setbodybytes(self, b):
394 def setbodybytes(self, b):
395 """Define the response body as static bytes.
395 """Define the response body as static bytes.
396
396
397 The empty string signals that there is no response body.
397 The empty string signals that there is no response body.
398 """
398 """
399 self._verifybody()
399 self._verifybody()
400 self._bodybytes = b
400 self._bodybytes = b
401 self.headers['Content-Length'] = '%d' % len(b)
401 self.headers['Content-Length'] = '%d' % len(b)
402
402
403 def setbodygen(self, gen):
403 def setbodygen(self, gen):
404 """Define the response body as a generator of bytes."""
404 """Define the response body as a generator of bytes."""
405 self._verifybody()
405 self._verifybody()
406 self._bodygen = gen
406 self._bodygen = gen
407
407
408 def setbodywillwrite(self):
408 def setbodywillwrite(self):
409 """Signal an intent to use write() to emit the response body.
409 """Signal an intent to use write() to emit the response body.
410
410
411 **This is the least preferred way to send a body.**
411 **This is the least preferred way to send a body.**
412
412
413 It is preferred for WSGI applications to emit a generator of chunks
413 It is preferred for WSGI applications to emit a generator of chunks
414 constituting the response body. However, some consumers can't emit
414 constituting the response body. However, some consumers can't emit
415 data this way. So, WSGI provides a way to obtain a ``write(data)``
415 data this way. So, WSGI provides a way to obtain a ``write(data)``
416 function that can be used to synchronously perform an unbuffered
416 function that can be used to synchronously perform an unbuffered
417 write.
417 write.
418
418
419 Calling this function signals an intent to produce the body in this
419 Calling this function signals an intent to produce the body in this
420 manner.
420 manner.
421 """
421 """
422 self._verifybody()
422 self._verifybody()
423 self._bodywillwrite = True
423 self._bodywillwrite = True
424
424
425 def sendresponse(self):
425 def sendresponse(self):
426 """Send the generated response to the client.
426 """Send the generated response to the client.
427
427
428 Before this is called, ``status`` must be set and one of
428 Before this is called, ``status`` must be set and one of
429 ``setbodybytes()`` or ``setbodygen()`` must be called.
429 ``setbodybytes()`` or ``setbodygen()`` must be called.
430
430
431 Calling this method multiple times is not allowed.
431 Calling this method multiple times is not allowed.
432 """
432 """
433 if self._started:
433 if self._started:
434 raise error.ProgrammingError('sendresponse() called multiple times')
434 raise error.ProgrammingError('sendresponse() called multiple times')
435
435
436 self._started = True
436 self._started = True
437
437
438 if not self.status:
438 if not self.status:
439 raise error.ProgrammingError('status line not defined')
439 raise error.ProgrammingError('status line not defined')
440
440
441 if (self._bodybytes is None and self._bodygen is None
441 if (self._bodybytes is None and self._bodygen is None
442 and not self._bodywillwrite):
442 and not self._bodywillwrite):
443 raise error.ProgrammingError('response body not defined')
443 raise error.ProgrammingError('response body not defined')
444
444
445 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
445 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
446 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
446 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
447 # and SHOULD NOT generate other headers unless they could be used
447 # and SHOULD NOT generate other headers unless they could be used
448 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
448 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
449 # states that no response body can be issued. Content-Length can
449 # states that no response body can be issued. Content-Length can
450 # be sent. But if it is present, it should be the size of the response
450 # be sent. But if it is present, it should be the size of the response
451 # that wasn't transferred.
451 # that wasn't transferred.
452 if self.status.startswith('304 '):
452 if self.status.startswith('304 '):
453 # setbodybytes('') will set C-L to 0. This doesn't conform with the
453 # setbodybytes('') will set C-L to 0. This doesn't conform with the
454 # spec. So remove it.
454 # spec. So remove it.
455 if self.headers.get('Content-Length') == '0':
455 if self.headers.get('Content-Length') == '0':
456 del self.headers['Content-Length']
456 del self.headers['Content-Length']
457
457
458 # Strictly speaking, this is too strict. But until it causes
458 # Strictly speaking, this is too strict. But until it causes
459 # problems, let's be strict.
459 # problems, let's be strict.
460 badheaders = {k for k in self.headers.keys()
460 badheaders = {k for k in self.headers.keys()
461 if k.lower() not in ('date', 'etag', 'expires',
461 if k.lower() not in ('date', 'etag', 'expires',
462 'cache-control',
462 'cache-control',
463 'content-location',
463 'content-location',
464 'vary')}
464 'vary')}
465 if badheaders:
465 if badheaders:
466 raise error.ProgrammingError(
466 raise error.ProgrammingError(
467 'illegal header on 304 response: %s' %
467 'illegal header on 304 response: %s' %
468 ', '.join(sorted(badheaders)))
468 ', '.join(sorted(badheaders)))
469
469
470 if self._bodygen is not None or self._bodywillwrite:
470 if self._bodygen is not None or self._bodywillwrite:
471 raise error.ProgrammingError("must use setbodybytes('') with "
471 raise error.ProgrammingError("must use setbodybytes('') with "
472 "304 responses")
472 "304 responses")
473
473
474 # Various HTTP clients (notably httplib) won't read the HTTP response
474 # Various HTTP clients (notably httplib) won't read the HTTP response
475 # until the HTTP request has been sent in full. If servers (us) send a
475 # until the HTTP request has been sent in full. If servers (us) send a
476 # response before the HTTP request has been fully sent, the connection
476 # response before the HTTP request has been fully sent, the connection
477 # may deadlock because neither end is reading.
477 # may deadlock because neither end is reading.
478 #
478 #
479 # We work around this by "draining" the request data before
479 # We work around this by "draining" the request data before
480 # sending any response in some conditions.
480 # sending any response in some conditions.
481 drain = False
481 drain = False
482 close = False
482 close = False
483
483
484 # If the client sent Expect: 100-continue, we assume it is smart enough
484 # If the client sent Expect: 100-continue, we assume it is smart enough
485 # to deal with the server sending a response before reading the request.
485 # to deal with the server sending a response before reading the request.
486 # (httplib doesn't do this.)
486 # (httplib doesn't do this.)
487 if self._req.headers.get('Expect', '').lower() == '100-continue':
487 if self._req.headers.get('Expect', '').lower() == '100-continue':
488 pass
488 pass
489 # Only tend to request methods that have bodies. Strictly speaking,
489 # Only tend to request methods that have bodies. Strictly speaking,
490 # we should sniff for a body. But this is fine for our existing
490 # we should sniff for a body. But this is fine for our existing
491 # WSGI applications.
491 # WSGI applications.
492 elif self._req.method not in ('POST', 'PUT'):
492 elif self._req.method not in ('POST', 'PUT'):
493 pass
493 pass
494 else:
494 else:
495 # If we don't know how much data to read, there's no guarantee
495 # If we don't know how much data to read, there's no guarantee
496 # that we can drain the request responsibly. The WSGI
496 # that we can drain the request responsibly. The WSGI
497 # specification only says that servers *should* ensure the
497 # specification only says that servers *should* ensure the
498 # input stream doesn't overrun the actual request. So there's
498 # input stream doesn't overrun the actual request. So there's
499 # no guarantee that reading until EOF won't corrupt the stream
499 # no guarantee that reading until EOF won't corrupt the stream
500 # state.
500 # state.
501 if not isinstance(self._req.bodyfh, util.cappedreader):
501 if not isinstance(self._req.bodyfh, util.cappedreader):
502 close = True
502 close = True
503 else:
503 else:
504 # We /could/ only drain certain HTTP response codes. But 200 and
504 # We /could/ only drain certain HTTP response codes. But 200 and
505 # non-200 wire protocol responses both require draining. Since
505 # non-200 wire protocol responses both require draining. Since
506 # we have a capped reader in place for all situations where we
506 # we have a capped reader in place for all situations where we
507 # drain, it is safe to read from that stream. We'll either do
507 # drain, it is safe to read from that stream. We'll either do
508 # a drain or no-op if we're already at EOF.
508 # a drain or no-op if we're already at EOF.
509 drain = True
509 drain = True
510
510
511 if close:
511 if close:
512 self.headers['Connection'] = 'Close'
512 self.headers['Connection'] = 'Close'
513
513
514 if drain:
514 if drain:
515 assert isinstance(self._req.bodyfh, util.cappedreader)
515 assert isinstance(self._req.bodyfh, util.cappedreader)
516 while True:
516 while True:
517 chunk = self._req.bodyfh.read(32768)
517 chunk = self._req.bodyfh.read(32768)
518 if not chunk:
518 if not chunk:
519 break
519 break
520
520
521 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
522 k, v in self.headers.items()]
521 write = self._startresponse(pycompat.sysstr(self.status),
523 write = self._startresponse(pycompat.sysstr(self.status),
522 self.headers.items())
524 strheaders)
523
525
524 if self._bodybytes:
526 if self._bodybytes:
525 yield self._bodybytes
527 yield self._bodybytes
526 elif self._bodygen:
528 elif self._bodygen:
527 for chunk in self._bodygen:
529 for chunk in self._bodygen:
528 yield chunk
530 yield chunk
529 elif self._bodywillwrite:
531 elif self._bodywillwrite:
530 self._bodywritefn = write
532 self._bodywritefn = write
531 else:
533 else:
532 error.ProgrammingError('do not know how to send body')
534 error.ProgrammingError('do not know how to send body')
533
535
534 def getbodyfile(self):
536 def getbodyfile(self):
535 """Obtain a file object like object representing the response body.
537 """Obtain a file object like object representing the response body.
536
538
537 For this to work, you must call ``setbodywillwrite()`` and then
539 For this to work, you must call ``setbodywillwrite()`` and then
538 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
540 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
539 function won't run to completion unless the generator is advanced. The
541 function won't run to completion unless the generator is advanced. The
540 generator yields not items. The easiest way to consume it is with
542 generator yields not items. The easiest way to consume it is with
541 ``list(res.sendresponse())``, which should resolve to an empty list -
543 ``list(res.sendresponse())``, which should resolve to an empty list -
542 ``[]``.
544 ``[]``.
543 """
545 """
544 if not self._bodywillwrite:
546 if not self._bodywillwrite:
545 raise error.ProgrammingError('must call setbodywillwrite() first')
547 raise error.ProgrammingError('must call setbodywillwrite() first')
546
548
547 if not self._started:
549 if not self._started:
548 raise error.ProgrammingError('must call sendresponse() first; did '
550 raise error.ProgrammingError('must call sendresponse() first; did '
549 'you remember to consume it since it '
551 'you remember to consume it since it '
550 'is a generator?')
552 'is a generator?')
551
553
552 assert self._bodywritefn
554 assert self._bodywritefn
553 return offsettrackingwriter(self._bodywritefn)
555 return offsettrackingwriter(self._bodywritefn)
554
556
555 def wsgiapplication(app_maker):
557 def wsgiapplication(app_maker):
556 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
558 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
557 can and should now be used as a WSGI application.'''
559 can and should now be used as a WSGI application.'''
558 application = app_maker()
560 application = app_maker()
559 def run_wsgi(env, respond):
561 def run_wsgi(env, respond):
560 return application(env, respond)
562 return application(env, respond)
561 return run_wsgi
563 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now