##// END OF EJS Templates
hgweb: also set Content-Type header...
Gregory Szorc -
r37067:55e90139 default
parent child Browse files
Show More
@@ -1,558 +1,561
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import wsgiref.headers as wsgiheaders
11 import wsgiref.headers as wsgiheaders
12 #import wsgiref.validate
12 #import wsgiref.validate
13
13
14 from ..thirdparty import (
14 from ..thirdparty import (
15 attr,
15 attr,
16 )
16 )
17 from .. import (
17 from .. import (
18 error,
18 error,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22
22
23 class multidict(object):
23 class multidict(object):
24 """A dict like object that can store multiple values for a key.
24 """A dict like object that can store multiple values for a key.
25
25
26 Used to store parsed request parameters.
26 Used to store parsed request parameters.
27
27
28 This is inspired by WebOb's class of the same name.
28 This is inspired by WebOb's class of the same name.
29 """
29 """
30 def __init__(self):
30 def __init__(self):
31 self._items = {}
31 self._items = {}
32
32
33 def __getitem__(self, key):
33 def __getitem__(self, key):
34 """Returns the last set value for a key."""
34 """Returns the last set value for a key."""
35 return self._items[key][-1]
35 return self._items[key][-1]
36
36
37 def __setitem__(self, key, value):
37 def __setitem__(self, key, value):
38 """Replace a values for a key with a new value."""
38 """Replace a values for a key with a new value."""
39 self._items[key] = [value]
39 self._items[key] = [value]
40
40
41 def __delitem__(self, key):
41 def __delitem__(self, key):
42 """Delete all values for a key."""
42 """Delete all values for a key."""
43 del self._items[key]
43 del self._items[key]
44
44
45 def __contains__(self, key):
45 def __contains__(self, key):
46 return key in self._items
46 return key in self._items
47
47
48 def __len__(self):
48 def __len__(self):
49 return len(self._items)
49 return len(self._items)
50
50
51 def get(self, key, default=None):
51 def get(self, key, default=None):
52 try:
52 try:
53 return self.__getitem__(key)
53 return self.__getitem__(key)
54 except KeyError:
54 except KeyError:
55 return default
55 return default
56
56
57 def add(self, key, value):
57 def add(self, key, value):
58 """Add a new value for a key. Does not replace existing values."""
58 """Add a new value for a key. Does not replace existing values."""
59 self._items.setdefault(key, []).append(value)
59 self._items.setdefault(key, []).append(value)
60
60
61 def getall(self, key):
61 def getall(self, key):
62 """Obtains all values for a key."""
62 """Obtains all values for a key."""
63 return self._items.get(key, [])
63 return self._items.get(key, [])
64
64
65 def getone(self, key):
65 def getone(self, key):
66 """Obtain a single value for a key.
66 """Obtain a single value for a key.
67
67
68 Raises KeyError if key not defined or it has multiple values set.
68 Raises KeyError if key not defined or it has multiple values set.
69 """
69 """
70 vals = self._items[key]
70 vals = self._items[key]
71
71
72 if len(vals) > 1:
72 if len(vals) > 1:
73 raise KeyError('multiple values for %r' % key)
73 raise KeyError('multiple values for %r' % key)
74
74
75 return vals[0]
75 return vals[0]
76
76
77 def asdictoflists(self):
77 def asdictoflists(self):
78 return {k: list(v) for k, v in self._items.iteritems()}
78 return {k: list(v) for k, v in self._items.iteritems()}
79
79
80 @attr.s(frozen=True)
80 @attr.s(frozen=True)
81 class parsedrequest(object):
81 class parsedrequest(object):
82 """Represents a parsed WSGI request.
82 """Represents a parsed WSGI request.
83
83
84 Contains both parsed parameters as well as a handle on the input stream.
84 Contains both parsed parameters as well as a handle on the input stream.
85 """
85 """
86
86
87 # Request method.
87 # Request method.
88 method = attr.ib()
88 method = attr.ib()
89 # Full URL for this request.
89 # Full URL for this request.
90 url = attr.ib()
90 url = attr.ib()
91 # URL without any path components. Just <proto>://<host><port>.
91 # URL without any path components. Just <proto>://<host><port>.
92 baseurl = attr.ib()
92 baseurl = attr.ib()
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
94 # of HTTP: Host header for hostname. This is likely what clients used.
94 # of HTTP: Host header for hostname. This is likely what clients used.
95 advertisedurl = attr.ib()
95 advertisedurl = attr.ib()
96 advertisedbaseurl = attr.ib()
96 advertisedbaseurl = attr.ib()
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
98 urlscheme = attr.ib()
98 urlscheme = attr.ib()
99 # Value of REMOTE_USER, if set, or None.
99 # Value of REMOTE_USER, if set, or None.
100 remoteuser = attr.ib()
100 remoteuser = attr.ib()
101 # Value of REMOTE_HOST, if set, or None.
101 # Value of REMOTE_HOST, if set, or None.
102 remotehost = attr.ib()
102 remotehost = attr.ib()
103 # Relative WSGI application path. If defined, will begin with a
103 # Relative WSGI application path. If defined, will begin with a
104 # ``/``.
104 # ``/``.
105 apppath = attr.ib()
105 apppath = attr.ib()
106 # List of path parts to be used for dispatch.
106 # List of path parts to be used for dispatch.
107 dispatchparts = attr.ib()
107 dispatchparts = attr.ib()
108 # URL path component (no query string) used for dispatch. Can be
108 # URL path component (no query string) used for dispatch. Can be
109 # ``None`` to signal no path component given to the request, an
109 # ``None`` to signal no path component given to the request, an
110 # empty string to signal a request to the application's root URL,
110 # empty string to signal a request to the application's root URL,
111 # or a string not beginning with ``/`` containing the requested
111 # or a string not beginning with ``/`` containing the requested
112 # path under the application.
112 # path under the application.
113 dispatchpath = attr.ib()
113 dispatchpath = attr.ib()
114 # The name of the repository being accessed.
114 # The name of the repository being accessed.
115 reponame = attr.ib()
115 reponame = attr.ib()
116 # Raw query string (part after "?" in URL).
116 # Raw query string (part after "?" in URL).
117 querystring = attr.ib()
117 querystring = attr.ib()
118 # multidict of query string parameters.
118 # multidict of query string parameters.
119 qsparams = attr.ib()
119 qsparams = attr.ib()
120 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # wsgiref.headers.Headers instance. Operates like a dict with case
121 # insensitive keys.
121 # insensitive keys.
122 headers = attr.ib()
122 headers = attr.ib()
123 # Request body input stream.
123 # Request body input stream.
124 bodyfh = attr.ib()
124 bodyfh = attr.ib()
125 # WSGI environment dict, unmodified.
125 # WSGI environment dict, unmodified.
126 rawenv = attr.ib()
126 rawenv = attr.ib()
127
127
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
129 """Parse URL components from environment variables.
129 """Parse URL components from environment variables.
130
130
131 WSGI defines request attributes via environment variables. This function
131 WSGI defines request attributes via environment variables. This function
132 parses the environment variables into a data structure.
132 parses the environment variables into a data structure.
133
133
134 If ``reponame`` is defined, the leading path components matching that
134 If ``reponame`` is defined, the leading path components matching that
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
136 This simulates the world view of a WSGI application that processes
136 This simulates the world view of a WSGI application that processes
137 requests from the base URL of a repo.
137 requests from the base URL of a repo.
138
138
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
140 is defined, it is used - instead of the WSGI environment variables - for
140 is defined, it is used - instead of the WSGI environment variables - for
141 constructing URL components up to and including the WSGI application path.
141 constructing URL components up to and including the WSGI application path.
142 For example, if the current WSGI application is at ``/repo`` and a request
142 For example, if the current WSGI application is at ``/repo`` and a request
143 is made to ``/rev/@`` with this argument set to
143 is made to ``/rev/@`` with this argument set to
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
148 """
148 """
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
150
150
151 # We first validate that the incoming object conforms with the WSGI spec.
151 # We first validate that the incoming object conforms with the WSGI spec.
152 # We only want to be dealing with spec-conforming WSGI implementations.
152 # We only want to be dealing with spec-conforming WSGI implementations.
153 # TODO enable this once we fix internal violations.
153 # TODO enable this once we fix internal violations.
154 #wsgiref.validate.check_environ(env)
154 #wsgiref.validate.check_environ(env)
155
155
156 # PEP-0333 states that environment keys and values are native strings
156 # PEP-0333 states that environment keys and values are native strings
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
159 # in Mercurial, so mass convert string keys and values to bytes.
159 # in Mercurial, so mass convert string keys and values to bytes.
160 if pycompat.ispy3:
160 if pycompat.ispy3:
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
163 for k, v in env.iteritems()}
163 for k, v in env.iteritems()}
164
164
165 if altbaseurl:
165 if altbaseurl:
166 altbaseurl = util.url(altbaseurl)
166 altbaseurl = util.url(altbaseurl)
167
167
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
169 # the environment variables.
169 # the environment variables.
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
171 # how URLs are reconstructed.
171 # how URLs are reconstructed.
172 fullurl = env['wsgi.url_scheme'] + '://'
172 fullurl = env['wsgi.url_scheme'] + '://'
173
173
174 if altbaseurl and altbaseurl.scheme:
174 if altbaseurl and altbaseurl.scheme:
175 advertisedfullurl = altbaseurl.scheme + '://'
175 advertisedfullurl = altbaseurl.scheme + '://'
176 else:
176 else:
177 advertisedfullurl = fullurl
177 advertisedfullurl = fullurl
178
178
179 def addport(s, port):
179 def addport(s, port):
180 if s.startswith('https://'):
180 if s.startswith('https://'):
181 if port != '443':
181 if port != '443':
182 s += ':' + port
182 s += ':' + port
183 else:
183 else:
184 if port != '80':
184 if port != '80':
185 s += ':' + port
185 s += ':' + port
186
186
187 return s
187 return s
188
188
189 if env.get('HTTP_HOST'):
189 if env.get('HTTP_HOST'):
190 fullurl += env['HTTP_HOST']
190 fullurl += env['HTTP_HOST']
191 else:
191 else:
192 fullurl += env['SERVER_NAME']
192 fullurl += env['SERVER_NAME']
193 fullurl = addport(fullurl, env['SERVER_PORT'])
193 fullurl = addport(fullurl, env['SERVER_PORT'])
194
194
195 if altbaseurl and altbaseurl.host:
195 if altbaseurl and altbaseurl.host:
196 advertisedfullurl += altbaseurl.host
196 advertisedfullurl += altbaseurl.host
197
197
198 if altbaseurl.port:
198 if altbaseurl.port:
199 port = altbaseurl.port
199 port = altbaseurl.port
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
201 port = '80'
201 port = '80'
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
203 port = '443'
203 port = '443'
204 else:
204 else:
205 port = env['SERVER_PORT']
205 port = env['SERVER_PORT']
206
206
207 advertisedfullurl = addport(advertisedfullurl, port)
207 advertisedfullurl = addport(advertisedfullurl, port)
208 else:
208 else:
209 advertisedfullurl += env['SERVER_NAME']
209 advertisedfullurl += env['SERVER_NAME']
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
211
211
212 baseurl = fullurl
212 baseurl = fullurl
213 advertisedbaseurl = advertisedfullurl
213 advertisedbaseurl = advertisedfullurl
214
214
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
217
217
218 if altbaseurl:
218 if altbaseurl:
219 path = altbaseurl.path or ''
219 path = altbaseurl.path or ''
220 if path and not path.startswith('/'):
220 if path and not path.startswith('/'):
221 path = '/' + path
221 path = '/' + path
222 advertisedfullurl += util.urlreq.quote(path)
222 advertisedfullurl += util.urlreq.quote(path)
223 else:
223 else:
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
225
225
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
227
227
228 if env.get('QUERY_STRING'):
228 if env.get('QUERY_STRING'):
229 fullurl += '?' + env['QUERY_STRING']
229 fullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
231
231
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
233 # that represents the repository being dispatched to. When computing
233 # that represents the repository being dispatched to. When computing
234 # the dispatch info, we ignore these leading path components.
234 # the dispatch info, we ignore these leading path components.
235
235
236 if altbaseurl:
236 if altbaseurl:
237 apppath = altbaseurl.path or ''
237 apppath = altbaseurl.path or ''
238 if apppath and not apppath.startswith('/'):
238 if apppath and not apppath.startswith('/'):
239 apppath = '/' + apppath
239 apppath = '/' + apppath
240 else:
240 else:
241 apppath = env.get('SCRIPT_NAME', '')
241 apppath = env.get('SCRIPT_NAME', '')
242
242
243 if reponame:
243 if reponame:
244 repoprefix = '/' + reponame.strip('/')
244 repoprefix = '/' + reponame.strip('/')
245
245
246 if not env.get('PATH_INFO'):
246 if not env.get('PATH_INFO'):
247 raise error.ProgrammingError('reponame requires PATH_INFO')
247 raise error.ProgrammingError('reponame requires PATH_INFO')
248
248
249 if not env['PATH_INFO'].startswith(repoprefix):
249 if not env['PATH_INFO'].startswith(repoprefix):
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
251 'name: %s (%s)' % (env['PATH_INFO'],
251 'name: %s (%s)' % (env['PATH_INFO'],
252 reponame))
252 reponame))
253
253
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
255
255
256 if dispatchpath and not dispatchpath.startswith('/'):
256 if dispatchpath and not dispatchpath.startswith('/'):
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
258 'not end at path delimiter: %s (%s)' %
258 'not end at path delimiter: %s (%s)' %
259 (env['PATH_INFO'], reponame))
259 (env['PATH_INFO'], reponame))
260
260
261 apppath = apppath.rstrip('/') + repoprefix
261 apppath = apppath.rstrip('/') + repoprefix
262 dispatchparts = dispatchpath.strip('/').split('/')
262 dispatchparts = dispatchpath.strip('/').split('/')
263 dispatchpath = '/'.join(dispatchparts)
263 dispatchpath = '/'.join(dispatchparts)
264
264
265 elif 'PATH_INFO' in env:
265 elif 'PATH_INFO' in env:
266 if env['PATH_INFO'].strip('/'):
266 if env['PATH_INFO'].strip('/'):
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
268 dispatchpath = '/'.join(dispatchparts)
268 dispatchpath = '/'.join(dispatchparts)
269 else:
269 else:
270 dispatchparts = []
270 dispatchparts = []
271 dispatchpath = ''
271 dispatchpath = ''
272 else:
272 else:
273 dispatchparts = []
273 dispatchparts = []
274 dispatchpath = None
274 dispatchpath = None
275
275
276 querystring = env.get('QUERY_STRING', '')
276 querystring = env.get('QUERY_STRING', '')
277
277
278 # We store as a list so we have ordering information. We also store as
278 # We store as a list so we have ordering information. We also store as
279 # a dict to facilitate fast lookup.
279 # a dict to facilitate fast lookup.
280 qsparams = multidict()
280 qsparams = multidict()
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
282 qsparams.add(k, v)
282 qsparams.add(k, v)
283
283
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
285 # perform case normalization for us. We just rewrite underscore to dash
285 # perform case normalization for us. We just rewrite underscore to dash
286 # so keys match what likely went over the wire.
286 # so keys match what likely went over the wire.
287 headers = []
287 headers = []
288 for k, v in env.iteritems():
288 for k, v in env.iteritems():
289 if k.startswith('HTTP_'):
289 if k.startswith('HTTP_'):
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
291
291
292 headers = wsgiheaders.Headers(headers)
292 headers = wsgiheaders.Headers(headers)
293
293
294 # This is kind of a lie because the HTTP header wasn't explicitly
294 # This is kind of a lie because the HTTP header wasn't explicitly
295 # sent. But for all intents and purposes it should be OK to lie about
295 # sent. But for all intents and purposes it should be OK to lie about
296 # this, since a consumer will either either value to determine how many
296 # this, since a consumer will either either value to determine how many
297 # bytes are available to read.
297 # bytes are available to read.
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
299 headers['Content-Length'] = env['CONTENT_LENGTH']
299 headers['Content-Length'] = env['CONTENT_LENGTH']
300
300
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
302 headers['Content-Type'] = env['CONTENT_TYPE']
303
301 bodyfh = env['wsgi.input']
304 bodyfh = env['wsgi.input']
302 if 'Content-Length' in headers:
305 if 'Content-Length' in headers:
303 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
304
307
305 return parsedrequest(method=env['REQUEST_METHOD'],
308 return parsedrequest(method=env['REQUEST_METHOD'],
306 url=fullurl, baseurl=baseurl,
309 url=fullurl, baseurl=baseurl,
307 advertisedurl=advertisedfullurl,
310 advertisedurl=advertisedfullurl,
308 advertisedbaseurl=advertisedbaseurl,
311 advertisedbaseurl=advertisedbaseurl,
309 urlscheme=env['wsgi.url_scheme'],
312 urlscheme=env['wsgi.url_scheme'],
310 remoteuser=env.get('REMOTE_USER'),
313 remoteuser=env.get('REMOTE_USER'),
311 remotehost=env.get('REMOTE_HOST'),
314 remotehost=env.get('REMOTE_HOST'),
312 apppath=apppath,
315 apppath=apppath,
313 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
314 reponame=reponame,
317 reponame=reponame,
315 querystring=querystring,
318 querystring=querystring,
316 qsparams=qsparams,
319 qsparams=qsparams,
317 headers=headers,
320 headers=headers,
318 bodyfh=bodyfh,
321 bodyfh=bodyfh,
319 rawenv=env)
322 rawenv=env)
320
323
321 class offsettrackingwriter(object):
324 class offsettrackingwriter(object):
322 """A file object like object that is append only and tracks write count.
325 """A file object like object that is append only and tracks write count.
323
326
324 Instances are bound to a callable. This callable is called with data
327 Instances are bound to a callable. This callable is called with data
325 whenever a ``write()`` is attempted.
328 whenever a ``write()`` is attempted.
326
329
327 Instances track the amount of written data so they can answer ``tell()``
330 Instances track the amount of written data so they can answer ``tell()``
328 requests.
331 requests.
329
332
330 The intent of this class is to wrap the ``write()`` function returned by
333 The intent of this class is to wrap the ``write()`` function returned by
331 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
332 not a file object, it doesn't implement other file object methods.
335 not a file object, it doesn't implement other file object methods.
333 """
336 """
334 def __init__(self, writefn):
337 def __init__(self, writefn):
335 self._write = writefn
338 self._write = writefn
336 self._offset = 0
339 self._offset = 0
337
340
338 def write(self, s):
341 def write(self, s):
339 res = self._write(s)
342 res = self._write(s)
340 # Some Python objects don't report the number of bytes written.
343 # Some Python objects don't report the number of bytes written.
341 if res is None:
344 if res is None:
342 self._offset += len(s)
345 self._offset += len(s)
343 else:
346 else:
344 self._offset += res
347 self._offset += res
345
348
346 def flush(self):
349 def flush(self):
347 pass
350 pass
348
351
349 def tell(self):
352 def tell(self):
350 return self._offset
353 return self._offset
351
354
352 class wsgiresponse(object):
355 class wsgiresponse(object):
353 """Represents a response to a WSGI request.
356 """Represents a response to a WSGI request.
354
357
355 A response consists of a status line, headers, and a body.
358 A response consists of a status line, headers, and a body.
356
359
357 Consumers must populate the ``status`` and ``headers`` fields and
360 Consumers must populate the ``status`` and ``headers`` fields and
358 make a call to a ``setbody*()`` method before the response can be
361 make a call to a ``setbody*()`` method before the response can be
359 issued.
362 issued.
360
363
361 When it is time to start sending the response over the wire,
364 When it is time to start sending the response over the wire,
362 ``sendresponse()`` is called. It handles emitting the header portion
365 ``sendresponse()`` is called. It handles emitting the header portion
363 of the response message. It then yields chunks of body data to be
366 of the response message. It then yields chunks of body data to be
364 written to the peer. Typically, the WSGI application itself calls
367 written to the peer. Typically, the WSGI application itself calls
365 and returns the value from ``sendresponse()``.
368 and returns the value from ``sendresponse()``.
366 """
369 """
367
370
368 def __init__(self, req, startresponse):
371 def __init__(self, req, startresponse):
369 """Create an empty response tied to a specific request.
372 """Create an empty response tied to a specific request.
370
373
371 ``req`` is a ``parsedrequest``. ``startresponse`` is the
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
372 ``start_response`` function passed to the WSGI application.
375 ``start_response`` function passed to the WSGI application.
373 """
376 """
374 self._req = req
377 self._req = req
375 self._startresponse = startresponse
378 self._startresponse = startresponse
376
379
377 self.status = None
380 self.status = None
378 self.headers = wsgiheaders.Headers([])
381 self.headers = wsgiheaders.Headers([])
379
382
380 self._bodybytes = None
383 self._bodybytes = None
381 self._bodygen = None
384 self._bodygen = None
382 self._bodywillwrite = False
385 self._bodywillwrite = False
383 self._started = False
386 self._started = False
384 self._bodywritefn = None
387 self._bodywritefn = None
385
388
386 def _verifybody(self):
389 def _verifybody(self):
387 if (self._bodybytes is not None or self._bodygen is not None
390 if (self._bodybytes is not None or self._bodygen is not None
388 or self._bodywillwrite):
391 or self._bodywillwrite):
389 raise error.ProgrammingError('cannot define body multiple times')
392 raise error.ProgrammingError('cannot define body multiple times')
390
393
391 def setbodybytes(self, b):
394 def setbodybytes(self, b):
392 """Define the response body as static bytes.
395 """Define the response body as static bytes.
393
396
394 The empty string signals that there is no response body.
397 The empty string signals that there is no response body.
395 """
398 """
396 self._verifybody()
399 self._verifybody()
397 self._bodybytes = b
400 self._bodybytes = b
398 self.headers['Content-Length'] = '%d' % len(b)
401 self.headers['Content-Length'] = '%d' % len(b)
399
402
400 def setbodygen(self, gen):
403 def setbodygen(self, gen):
401 """Define the response body as a generator of bytes."""
404 """Define the response body as a generator of bytes."""
402 self._verifybody()
405 self._verifybody()
403 self._bodygen = gen
406 self._bodygen = gen
404
407
405 def setbodywillwrite(self):
408 def setbodywillwrite(self):
406 """Signal an intent to use write() to emit the response body.
409 """Signal an intent to use write() to emit the response body.
407
410
408 **This is the least preferred way to send a body.**
411 **This is the least preferred way to send a body.**
409
412
410 It is preferred for WSGI applications to emit a generator of chunks
413 It is preferred for WSGI applications to emit a generator of chunks
411 constituting the response body. However, some consumers can't emit
414 constituting the response body. However, some consumers can't emit
412 data this way. So, WSGI provides a way to obtain a ``write(data)``
415 data this way. So, WSGI provides a way to obtain a ``write(data)``
413 function that can be used to synchronously perform an unbuffered
416 function that can be used to synchronously perform an unbuffered
414 write.
417 write.
415
418
416 Calling this function signals an intent to produce the body in this
419 Calling this function signals an intent to produce the body in this
417 manner.
420 manner.
418 """
421 """
419 self._verifybody()
422 self._verifybody()
420 self._bodywillwrite = True
423 self._bodywillwrite = True
421
424
422 def sendresponse(self):
425 def sendresponse(self):
423 """Send the generated response to the client.
426 """Send the generated response to the client.
424
427
425 Before this is called, ``status`` must be set and one of
428 Before this is called, ``status`` must be set and one of
426 ``setbodybytes()`` or ``setbodygen()`` must be called.
429 ``setbodybytes()`` or ``setbodygen()`` must be called.
427
430
428 Calling this method multiple times is not allowed.
431 Calling this method multiple times is not allowed.
429 """
432 """
430 if self._started:
433 if self._started:
431 raise error.ProgrammingError('sendresponse() called multiple times')
434 raise error.ProgrammingError('sendresponse() called multiple times')
432
435
433 self._started = True
436 self._started = True
434
437
435 if not self.status:
438 if not self.status:
436 raise error.ProgrammingError('status line not defined')
439 raise error.ProgrammingError('status line not defined')
437
440
438 if (self._bodybytes is None and self._bodygen is None
441 if (self._bodybytes is None and self._bodygen is None
439 and not self._bodywillwrite):
442 and not self._bodywillwrite):
440 raise error.ProgrammingError('response body not defined')
443 raise error.ProgrammingError('response body not defined')
441
444
442 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
445 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
443 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
446 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
444 # and SHOULD NOT generate other headers unless they could be used
447 # and SHOULD NOT generate other headers unless they could be used
445 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
448 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
446 # states that no response body can be issued. Content-Length can
449 # states that no response body can be issued. Content-Length can
447 # be sent. But if it is present, it should be the size of the response
450 # be sent. But if it is present, it should be the size of the response
448 # that wasn't transferred.
451 # that wasn't transferred.
449 if self.status.startswith('304 '):
452 if self.status.startswith('304 '):
450 # setbodybytes('') will set C-L to 0. This doesn't conform with the
453 # setbodybytes('') will set C-L to 0. This doesn't conform with the
451 # spec. So remove it.
454 # spec. So remove it.
452 if self.headers.get('Content-Length') == '0':
455 if self.headers.get('Content-Length') == '0':
453 del self.headers['Content-Length']
456 del self.headers['Content-Length']
454
457
455 # Strictly speaking, this is too strict. But until it causes
458 # Strictly speaking, this is too strict. But until it causes
456 # problems, let's be strict.
459 # problems, let's be strict.
457 badheaders = {k for k in self.headers.keys()
460 badheaders = {k for k in self.headers.keys()
458 if k.lower() not in ('date', 'etag', 'expires',
461 if k.lower() not in ('date', 'etag', 'expires',
459 'cache-control',
462 'cache-control',
460 'content-location',
463 'content-location',
461 'vary')}
464 'vary')}
462 if badheaders:
465 if badheaders:
463 raise error.ProgrammingError(
466 raise error.ProgrammingError(
464 'illegal header on 304 response: %s' %
467 'illegal header on 304 response: %s' %
465 ', '.join(sorted(badheaders)))
468 ', '.join(sorted(badheaders)))
466
469
467 if self._bodygen is not None or self._bodywillwrite:
470 if self._bodygen is not None or self._bodywillwrite:
468 raise error.ProgrammingError("must use setbodybytes('') with "
471 raise error.ProgrammingError("must use setbodybytes('') with "
469 "304 responses")
472 "304 responses")
470
473
471 # Various HTTP clients (notably httplib) won't read the HTTP response
474 # Various HTTP clients (notably httplib) won't read the HTTP response
472 # until the HTTP request has been sent in full. If servers (us) send a
475 # until the HTTP request has been sent in full. If servers (us) send a
473 # response before the HTTP request has been fully sent, the connection
476 # response before the HTTP request has been fully sent, the connection
474 # may deadlock because neither end is reading.
477 # may deadlock because neither end is reading.
475 #
478 #
476 # We work around this by "draining" the request data before
479 # We work around this by "draining" the request data before
477 # sending any response in some conditions.
480 # sending any response in some conditions.
478 drain = False
481 drain = False
479 close = False
482 close = False
480
483
481 # If the client sent Expect: 100-continue, we assume it is smart enough
484 # If the client sent Expect: 100-continue, we assume it is smart enough
482 # to deal with the server sending a response before reading the request.
485 # to deal with the server sending a response before reading the request.
483 # (httplib doesn't do this.)
486 # (httplib doesn't do this.)
484 if self._req.headers.get('Expect', '').lower() == '100-continue':
487 if self._req.headers.get('Expect', '').lower() == '100-continue':
485 pass
488 pass
486 # Only tend to request methods that have bodies. Strictly speaking,
489 # Only tend to request methods that have bodies. Strictly speaking,
487 # we should sniff for a body. But this is fine for our existing
490 # we should sniff for a body. But this is fine for our existing
488 # WSGI applications.
491 # WSGI applications.
489 elif self._req.method not in ('POST', 'PUT'):
492 elif self._req.method not in ('POST', 'PUT'):
490 pass
493 pass
491 else:
494 else:
492 # If we don't know how much data to read, there's no guarantee
495 # If we don't know how much data to read, there's no guarantee
493 # that we can drain the request responsibly. The WSGI
496 # that we can drain the request responsibly. The WSGI
494 # specification only says that servers *should* ensure the
497 # specification only says that servers *should* ensure the
495 # input stream doesn't overrun the actual request. So there's
498 # input stream doesn't overrun the actual request. So there's
496 # no guarantee that reading until EOF won't corrupt the stream
499 # no guarantee that reading until EOF won't corrupt the stream
497 # state.
500 # state.
498 if not isinstance(self._req.bodyfh, util.cappedreader):
501 if not isinstance(self._req.bodyfh, util.cappedreader):
499 close = True
502 close = True
500 else:
503 else:
501 # We /could/ only drain certain HTTP response codes. But 200 and
504 # We /could/ only drain certain HTTP response codes. But 200 and
502 # non-200 wire protocol responses both require draining. Since
505 # non-200 wire protocol responses both require draining. Since
503 # we have a capped reader in place for all situations where we
506 # we have a capped reader in place for all situations where we
504 # drain, it is safe to read from that stream. We'll either do
507 # drain, it is safe to read from that stream. We'll either do
505 # a drain or no-op if we're already at EOF.
508 # a drain or no-op if we're already at EOF.
506 drain = True
509 drain = True
507
510
508 if close:
511 if close:
509 self.headers['Connection'] = 'Close'
512 self.headers['Connection'] = 'Close'
510
513
511 if drain:
514 if drain:
512 assert isinstance(self._req.bodyfh, util.cappedreader)
515 assert isinstance(self._req.bodyfh, util.cappedreader)
513 while True:
516 while True:
514 chunk = self._req.bodyfh.read(32768)
517 chunk = self._req.bodyfh.read(32768)
515 if not chunk:
518 if not chunk:
516 break
519 break
517
520
518 write = self._startresponse(pycompat.sysstr(self.status),
521 write = self._startresponse(pycompat.sysstr(self.status),
519 self.headers.items())
522 self.headers.items())
520
523
521 if self._bodybytes:
524 if self._bodybytes:
522 yield self._bodybytes
525 yield self._bodybytes
523 elif self._bodygen:
526 elif self._bodygen:
524 for chunk in self._bodygen:
527 for chunk in self._bodygen:
525 yield chunk
528 yield chunk
526 elif self._bodywillwrite:
529 elif self._bodywillwrite:
527 self._bodywritefn = write
530 self._bodywritefn = write
528 else:
531 else:
529 error.ProgrammingError('do not know how to send body')
532 error.ProgrammingError('do not know how to send body')
530
533
531 def getbodyfile(self):
534 def getbodyfile(self):
532 """Obtain a file object like object representing the response body.
535 """Obtain a file object like object representing the response body.
533
536
534 For this to work, you must call ``setbodywillwrite()`` and then
537 For this to work, you must call ``setbodywillwrite()`` and then
535 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
538 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
536 function won't run to completion unless the generator is advanced. The
539 function won't run to completion unless the generator is advanced. The
537 generator yields not items. The easiest way to consume it is with
540 generator yields not items. The easiest way to consume it is with
538 ``list(res.sendresponse())``, which should resolve to an empty list -
541 ``list(res.sendresponse())``, which should resolve to an empty list -
539 ``[]``.
542 ``[]``.
540 """
543 """
541 if not self._bodywillwrite:
544 if not self._bodywillwrite:
542 raise error.ProgrammingError('must call setbodywillwrite() first')
545 raise error.ProgrammingError('must call setbodywillwrite() first')
543
546
544 if not self._started:
547 if not self._started:
545 raise error.ProgrammingError('must call sendresponse() first; did '
548 raise error.ProgrammingError('must call sendresponse() first; did '
546 'you remember to consume it since it '
549 'you remember to consume it since it '
547 'is a generator?')
550 'is a generator?')
548
551
549 assert self._bodywritefn
552 assert self._bodywritefn
550 return offsettrackingwriter(self._bodywritefn)
553 return offsettrackingwriter(self._bodywritefn)
551
554
552 def wsgiapplication(app_maker):
555 def wsgiapplication(app_maker):
553 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
556 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
554 can and should now be used as a WSGI application.'''
557 can and should now be used as a WSGI application.'''
555 application = app_maker()
558 application = app_maker()
556 def run_wsgi(env, respond):
559 def run_wsgi(env, respond):
557 return application(env, respond)
560 return application(env, respond)
558 return run_wsgi
561 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now