##// END OF EJS Templates
hgweb: use our forked wsgiheaders module instead of stdlib one...
Augie Fackler -
r37624:da84e26d default
parent child Browse files
Show More
@@ -1,563 +1,564
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import wsgiref.headers as wsgiheaders
12 #import wsgiref.validate
11 #import wsgiref.validate
13
12
14 from ..thirdparty import (
13 from ..thirdparty import (
15 attr,
14 attr,
16 )
15 )
17 from .. import (
16 from .. import (
18 error,
17 error,
19 pycompat,
18 pycompat,
20 util,
19 util,
21 )
20 )
22
21
23 class multidict(object):
22 class multidict(object):
24 """A dict like object that can store multiple values for a key.
23 """A dict like object that can store multiple values for a key.
25
24
26 Used to store parsed request parameters.
25 Used to store parsed request parameters.
27
26
28 This is inspired by WebOb's class of the same name.
27 This is inspired by WebOb's class of the same name.
29 """
28 """
30 def __init__(self):
29 def __init__(self):
31 self._items = {}
30 self._items = {}
32
31
33 def __getitem__(self, key):
32 def __getitem__(self, key):
34 """Returns the last set value for a key."""
33 """Returns the last set value for a key."""
35 return self._items[key][-1]
34 return self._items[key][-1]
36
35
37 def __setitem__(self, key, value):
36 def __setitem__(self, key, value):
38 """Replace a values for a key with a new value."""
37 """Replace a values for a key with a new value."""
39 self._items[key] = [value]
38 self._items[key] = [value]
40
39
41 def __delitem__(self, key):
40 def __delitem__(self, key):
42 """Delete all values for a key."""
41 """Delete all values for a key."""
43 del self._items[key]
42 del self._items[key]
44
43
45 def __contains__(self, key):
44 def __contains__(self, key):
46 return key in self._items
45 return key in self._items
47
46
48 def __len__(self):
47 def __len__(self):
49 return len(self._items)
48 return len(self._items)
50
49
51 def get(self, key, default=None):
50 def get(self, key, default=None):
52 try:
51 try:
53 return self.__getitem__(key)
52 return self.__getitem__(key)
54 except KeyError:
53 except KeyError:
55 return default
54 return default
56
55
57 def add(self, key, value):
56 def add(self, key, value):
58 """Add a new value for a key. Does not replace existing values."""
57 """Add a new value for a key. Does not replace existing values."""
59 self._items.setdefault(key, []).append(value)
58 self._items.setdefault(key, []).append(value)
60
59
61 def getall(self, key):
60 def getall(self, key):
62 """Obtains all values for a key."""
61 """Obtains all values for a key."""
63 return self._items.get(key, [])
62 return self._items.get(key, [])
64
63
65 def getone(self, key):
64 def getone(self, key):
66 """Obtain a single value for a key.
65 """Obtain a single value for a key.
67
66
68 Raises KeyError if key not defined or it has multiple values set.
67 Raises KeyError if key not defined or it has multiple values set.
69 """
68 """
70 vals = self._items[key]
69 vals = self._items[key]
71
70
72 if len(vals) > 1:
71 if len(vals) > 1:
73 raise KeyError('multiple values for %r' % key)
72 raise KeyError('multiple values for %r' % key)
74
73
75 return vals[0]
74 return vals[0]
76
75
77 def asdictoflists(self):
76 def asdictoflists(self):
78 return {k: list(v) for k, v in self._items.iteritems()}
77 return {k: list(v) for k, v in self._items.iteritems()}
79
78
80 @attr.s(frozen=True)
79 @attr.s(frozen=True)
81 class parsedrequest(object):
80 class parsedrequest(object):
82 """Represents a parsed WSGI request.
81 """Represents a parsed WSGI request.
83
82
84 Contains both parsed parameters as well as a handle on the input stream.
83 Contains both parsed parameters as well as a handle on the input stream.
85 """
84 """
86
85
87 # Request method.
86 # Request method.
88 method = attr.ib()
87 method = attr.ib()
89 # Full URL for this request.
88 # Full URL for this request.
90 url = attr.ib()
89 url = attr.ib()
91 # URL without any path components. Just <proto>://<host><port>.
90 # URL without any path components. Just <proto>://<host><port>.
92 baseurl = attr.ib()
91 baseurl = attr.ib()
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
94 # of HTTP: Host header for hostname. This is likely what clients used.
93 # of HTTP: Host header for hostname. This is likely what clients used.
95 advertisedurl = attr.ib()
94 advertisedurl = attr.ib()
96 advertisedbaseurl = attr.ib()
95 advertisedbaseurl = attr.ib()
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
98 urlscheme = attr.ib()
97 urlscheme = attr.ib()
99 # Value of REMOTE_USER, if set, or None.
98 # Value of REMOTE_USER, if set, or None.
100 remoteuser = attr.ib()
99 remoteuser = attr.ib()
101 # Value of REMOTE_HOST, if set, or None.
100 # Value of REMOTE_HOST, if set, or None.
102 remotehost = attr.ib()
101 remotehost = attr.ib()
103 # Relative WSGI application path. If defined, will begin with a
102 # Relative WSGI application path. If defined, will begin with a
104 # ``/``.
103 # ``/``.
105 apppath = attr.ib()
104 apppath = attr.ib()
106 # List of path parts to be used for dispatch.
105 # List of path parts to be used for dispatch.
107 dispatchparts = attr.ib()
106 dispatchparts = attr.ib()
108 # URL path component (no query string) used for dispatch. Can be
107 # URL path component (no query string) used for dispatch. Can be
109 # ``None`` to signal no path component given to the request, an
108 # ``None`` to signal no path component given to the request, an
110 # empty string to signal a request to the application's root URL,
109 # empty string to signal a request to the application's root URL,
111 # or a string not beginning with ``/`` containing the requested
110 # or a string not beginning with ``/`` containing the requested
112 # path under the application.
111 # path under the application.
113 dispatchpath = attr.ib()
112 dispatchpath = attr.ib()
114 # The name of the repository being accessed.
113 # The name of the repository being accessed.
115 reponame = attr.ib()
114 reponame = attr.ib()
116 # Raw query string (part after "?" in URL).
115 # Raw query string (part after "?" in URL).
117 querystring = attr.ib()
116 querystring = attr.ib()
118 # multidict of query string parameters.
117 # multidict of query string parameters.
119 qsparams = attr.ib()
118 qsparams = attr.ib()
120 # wsgiref.headers.Headers instance. Operates like a dict with case
119 # wsgiref.headers.Headers instance. Operates like a dict with case
121 # insensitive keys.
120 # insensitive keys.
122 headers = attr.ib()
121 headers = attr.ib()
123 # Request body input stream.
122 # Request body input stream.
124 bodyfh = attr.ib()
123 bodyfh = attr.ib()
125 # WSGI environment dict, unmodified.
124 # WSGI environment dict, unmodified.
126 rawenv = attr.ib()
125 rawenv = attr.ib()
127
126
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
127 def parserequestfromenv(env, reponame=None, altbaseurl=None):
129 """Parse URL components from environment variables.
128 """Parse URL components from environment variables.
130
129
131 WSGI defines request attributes via environment variables. This function
130 WSGI defines request attributes via environment variables. This function
132 parses the environment variables into a data structure.
131 parses the environment variables into a data structure.
133
132
134 If ``reponame`` is defined, the leading path components matching that
133 If ``reponame`` is defined, the leading path components matching that
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
136 This simulates the world view of a WSGI application that processes
135 This simulates the world view of a WSGI application that processes
137 requests from the base URL of a repo.
136 requests from the base URL of a repo.
138
137
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
140 is defined, it is used - instead of the WSGI environment variables - for
139 is defined, it is used - instead of the WSGI environment variables - for
141 constructing URL components up to and including the WSGI application path.
140 constructing URL components up to and including the WSGI application path.
142 For example, if the current WSGI application is at ``/repo`` and a request
141 For example, if the current WSGI application is at ``/repo`` and a request
143 is made to ``/rev/@`` with this argument set to
142 is made to ``/rev/@`` with this argument set to
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
148 """
147 """
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
148 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
150
149
151 # We first validate that the incoming object conforms with the WSGI spec.
150 # We first validate that the incoming object conforms with the WSGI spec.
152 # We only want to be dealing with spec-conforming WSGI implementations.
151 # We only want to be dealing with spec-conforming WSGI implementations.
153 # TODO enable this once we fix internal violations.
152 # TODO enable this once we fix internal violations.
154 #wsgiref.validate.check_environ(env)
153 #wsgiref.validate.check_environ(env)
155
154
156 # PEP-0333 states that environment keys and values are native strings
155 # PEP-0333 states that environment keys and values are native strings
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
156 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
157 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
159 # in Mercurial, so mass convert string keys and values to bytes.
158 # in Mercurial, so mass convert string keys and values to bytes.
160 if pycompat.ispy3:
159 if pycompat.ispy3:
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
160 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
161 env = {k: v.encode('latin-1') if isinstance(v, str) else v
163 for k, v in env.iteritems()}
162 for k, v in env.iteritems()}
164
163
165 if altbaseurl:
164 if altbaseurl:
166 altbaseurl = util.url(altbaseurl)
165 altbaseurl = util.url(altbaseurl)
167
166
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
167 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
169 # the environment variables.
168 # the environment variables.
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
169 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
171 # how URLs are reconstructed.
170 # how URLs are reconstructed.
172 fullurl = env['wsgi.url_scheme'] + '://'
171 fullurl = env['wsgi.url_scheme'] + '://'
173
172
174 if altbaseurl and altbaseurl.scheme:
173 if altbaseurl and altbaseurl.scheme:
175 advertisedfullurl = altbaseurl.scheme + '://'
174 advertisedfullurl = altbaseurl.scheme + '://'
176 else:
175 else:
177 advertisedfullurl = fullurl
176 advertisedfullurl = fullurl
178
177
179 def addport(s, port):
178 def addport(s, port):
180 if s.startswith('https://'):
179 if s.startswith('https://'):
181 if port != '443':
180 if port != '443':
182 s += ':' + port
181 s += ':' + port
183 else:
182 else:
184 if port != '80':
183 if port != '80':
185 s += ':' + port
184 s += ':' + port
186
185
187 return s
186 return s
188
187
189 if env.get('HTTP_HOST'):
188 if env.get('HTTP_HOST'):
190 fullurl += env['HTTP_HOST']
189 fullurl += env['HTTP_HOST']
191 else:
190 else:
192 fullurl += env['SERVER_NAME']
191 fullurl += env['SERVER_NAME']
193 fullurl = addport(fullurl, env['SERVER_PORT'])
192 fullurl = addport(fullurl, env['SERVER_PORT'])
194
193
195 if altbaseurl and altbaseurl.host:
194 if altbaseurl and altbaseurl.host:
196 advertisedfullurl += altbaseurl.host
195 advertisedfullurl += altbaseurl.host
197
196
198 if altbaseurl.port:
197 if altbaseurl.port:
199 port = altbaseurl.port
198 port = altbaseurl.port
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
199 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
201 port = '80'
200 port = '80'
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
201 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
203 port = '443'
202 port = '443'
204 else:
203 else:
205 port = env['SERVER_PORT']
204 port = env['SERVER_PORT']
206
205
207 advertisedfullurl = addport(advertisedfullurl, port)
206 advertisedfullurl = addport(advertisedfullurl, port)
208 else:
207 else:
209 advertisedfullurl += env['SERVER_NAME']
208 advertisedfullurl += env['SERVER_NAME']
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
209 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
211
210
212 baseurl = fullurl
211 baseurl = fullurl
213 advertisedbaseurl = advertisedfullurl
212 advertisedbaseurl = advertisedfullurl
214
213
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
214 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
215 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
217
216
218 if altbaseurl:
217 if altbaseurl:
219 path = altbaseurl.path or ''
218 path = altbaseurl.path or ''
220 if path and not path.startswith('/'):
219 if path and not path.startswith('/'):
221 path = '/' + path
220 path = '/' + path
222 advertisedfullurl += util.urlreq.quote(path)
221 advertisedfullurl += util.urlreq.quote(path)
223 else:
222 else:
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
223 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
225
224
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
225 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
227
226
228 if env.get('QUERY_STRING'):
227 if env.get('QUERY_STRING'):
229 fullurl += '?' + env['QUERY_STRING']
228 fullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
229 advertisedfullurl += '?' + env['QUERY_STRING']
231
230
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
231 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
233 # that represents the repository being dispatched to. When computing
232 # that represents the repository being dispatched to. When computing
234 # the dispatch info, we ignore these leading path components.
233 # the dispatch info, we ignore these leading path components.
235
234
236 if altbaseurl:
235 if altbaseurl:
237 apppath = altbaseurl.path or ''
236 apppath = altbaseurl.path or ''
238 if apppath and not apppath.startswith('/'):
237 if apppath and not apppath.startswith('/'):
239 apppath = '/' + apppath
238 apppath = '/' + apppath
240 else:
239 else:
241 apppath = env.get('SCRIPT_NAME', '')
240 apppath = env.get('SCRIPT_NAME', '')
242
241
243 if reponame:
242 if reponame:
244 repoprefix = '/' + reponame.strip('/')
243 repoprefix = '/' + reponame.strip('/')
245
244
246 if not env.get('PATH_INFO'):
245 if not env.get('PATH_INFO'):
247 raise error.ProgrammingError('reponame requires PATH_INFO')
246 raise error.ProgrammingError('reponame requires PATH_INFO')
248
247
249 if not env['PATH_INFO'].startswith(repoprefix):
248 if not env['PATH_INFO'].startswith(repoprefix):
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
249 raise error.ProgrammingError('PATH_INFO does not begin with repo '
251 'name: %s (%s)' % (env['PATH_INFO'],
250 'name: %s (%s)' % (env['PATH_INFO'],
252 reponame))
251 reponame))
253
252
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
253 dispatchpath = env['PATH_INFO'][len(repoprefix):]
255
254
256 if dispatchpath and not dispatchpath.startswith('/'):
255 if dispatchpath and not dispatchpath.startswith('/'):
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
256 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
258 'not end at path delimiter: %s (%s)' %
257 'not end at path delimiter: %s (%s)' %
259 (env['PATH_INFO'], reponame))
258 (env['PATH_INFO'], reponame))
260
259
261 apppath = apppath.rstrip('/') + repoprefix
260 apppath = apppath.rstrip('/') + repoprefix
262 dispatchparts = dispatchpath.strip('/').split('/')
261 dispatchparts = dispatchpath.strip('/').split('/')
263 dispatchpath = '/'.join(dispatchparts)
262 dispatchpath = '/'.join(dispatchparts)
264
263
265 elif 'PATH_INFO' in env:
264 elif 'PATH_INFO' in env:
266 if env['PATH_INFO'].strip('/'):
265 if env['PATH_INFO'].strip('/'):
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
266 dispatchparts = env['PATH_INFO'].strip('/').split('/')
268 dispatchpath = '/'.join(dispatchparts)
267 dispatchpath = '/'.join(dispatchparts)
269 else:
268 else:
270 dispatchparts = []
269 dispatchparts = []
271 dispatchpath = ''
270 dispatchpath = ''
272 else:
271 else:
273 dispatchparts = []
272 dispatchparts = []
274 dispatchpath = None
273 dispatchpath = None
275
274
276 querystring = env.get('QUERY_STRING', '')
275 querystring = env.get('QUERY_STRING', '')
277
276
278 # We store as a list so we have ordering information. We also store as
277 # We store as a list so we have ordering information. We also store as
279 # a dict to facilitate fast lookup.
278 # a dict to facilitate fast lookup.
280 qsparams = multidict()
279 qsparams = multidict()
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
280 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
282 qsparams.add(k, v)
281 qsparams.add(k, v)
283
282
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
283 # HTTP_* keys contain HTTP request headers. The Headers structure should
285 # perform case normalization for us. We just rewrite underscore to dash
284 # perform case normalization for us. We just rewrite underscore to dash
286 # so keys match what likely went over the wire.
285 # so keys match what likely went over the wire.
287 headers = []
286 headers = []
288 for k, v in env.iteritems():
287 for k, v in env.iteritems():
289 if k.startswith('HTTP_'):
288 if k.startswith('HTTP_'):
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
289 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
291
290
291 from . import wsgiheaders # avoid cycle
292 headers = wsgiheaders.Headers(headers)
292 headers = wsgiheaders.Headers(headers)
293
293
294 # This is kind of a lie because the HTTP header wasn't explicitly
294 # This is kind of a lie because the HTTP header wasn't explicitly
295 # sent. But for all intents and purposes it should be OK to lie about
295 # sent. But for all intents and purposes it should be OK to lie about
296 # this, since a consumer will either either value to determine how many
296 # this, since a consumer will either either value to determine how many
297 # bytes are available to read.
297 # bytes are available to read.
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
299 headers['Content-Length'] = env['CONTENT_LENGTH']
299 headers['Content-Length'] = env['CONTENT_LENGTH']
300
300
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
302 headers['Content-Type'] = env['CONTENT_TYPE']
302 headers['Content-Type'] = env['CONTENT_TYPE']
303
303
304 bodyfh = env['wsgi.input']
304 bodyfh = env['wsgi.input']
305 if 'Content-Length' in headers:
305 if 'Content-Length' in headers:
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
307
307
308 return parsedrequest(method=env['REQUEST_METHOD'],
308 return parsedrequest(method=env['REQUEST_METHOD'],
309 url=fullurl, baseurl=baseurl,
309 url=fullurl, baseurl=baseurl,
310 advertisedurl=advertisedfullurl,
310 advertisedurl=advertisedfullurl,
311 advertisedbaseurl=advertisedbaseurl,
311 advertisedbaseurl=advertisedbaseurl,
312 urlscheme=env['wsgi.url_scheme'],
312 urlscheme=env['wsgi.url_scheme'],
313 remoteuser=env.get('REMOTE_USER'),
313 remoteuser=env.get('REMOTE_USER'),
314 remotehost=env.get('REMOTE_HOST'),
314 remotehost=env.get('REMOTE_HOST'),
315 apppath=apppath,
315 apppath=apppath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
317 reponame=reponame,
317 reponame=reponame,
318 querystring=querystring,
318 querystring=querystring,
319 qsparams=qsparams,
319 qsparams=qsparams,
320 headers=headers,
320 headers=headers,
321 bodyfh=bodyfh,
321 bodyfh=bodyfh,
322 rawenv=env)
322 rawenv=env)
323
323
324 class offsettrackingwriter(object):
324 class offsettrackingwriter(object):
325 """A file object like object that is append only and tracks write count.
325 """A file object like object that is append only and tracks write count.
326
326
327 Instances are bound to a callable. This callable is called with data
327 Instances are bound to a callable. This callable is called with data
328 whenever a ``write()`` is attempted.
328 whenever a ``write()`` is attempted.
329
329
330 Instances track the amount of written data so they can answer ``tell()``
330 Instances track the amount of written data so they can answer ``tell()``
331 requests.
331 requests.
332
332
333 The intent of this class is to wrap the ``write()`` function returned by
333 The intent of this class is to wrap the ``write()`` function returned by
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
335 not a file object, it doesn't implement other file object methods.
335 not a file object, it doesn't implement other file object methods.
336 """
336 """
337 def __init__(self, writefn):
337 def __init__(self, writefn):
338 self._write = writefn
338 self._write = writefn
339 self._offset = 0
339 self._offset = 0
340
340
341 def write(self, s):
341 def write(self, s):
342 res = self._write(s)
342 res = self._write(s)
343 # Some Python objects don't report the number of bytes written.
343 # Some Python objects don't report the number of bytes written.
344 if res is None:
344 if res is None:
345 self._offset += len(s)
345 self._offset += len(s)
346 else:
346 else:
347 self._offset += res
347 self._offset += res
348
348
349 def flush(self):
349 def flush(self):
350 pass
350 pass
351
351
352 def tell(self):
352 def tell(self):
353 return self._offset
353 return self._offset
354
354
355 class wsgiresponse(object):
355 class wsgiresponse(object):
356 """Represents a response to a WSGI request.
356 """Represents a response to a WSGI request.
357
357
358 A response consists of a status line, headers, and a body.
358 A response consists of a status line, headers, and a body.
359
359
360 Consumers must populate the ``status`` and ``headers`` fields and
360 Consumers must populate the ``status`` and ``headers`` fields and
361 make a call to a ``setbody*()`` method before the response can be
361 make a call to a ``setbody*()`` method before the response can be
362 issued.
362 issued.
363
363
364 When it is time to start sending the response over the wire,
364 When it is time to start sending the response over the wire,
365 ``sendresponse()`` is called. It handles emitting the header portion
365 ``sendresponse()`` is called. It handles emitting the header portion
366 of the response message. It then yields chunks of body data to be
366 of the response message. It then yields chunks of body data to be
367 written to the peer. Typically, the WSGI application itself calls
367 written to the peer. Typically, the WSGI application itself calls
368 and returns the value from ``sendresponse()``.
368 and returns the value from ``sendresponse()``.
369 """
369 """
370
370
371 def __init__(self, req, startresponse):
371 def __init__(self, req, startresponse):
372 """Create an empty response tied to a specific request.
372 """Create an empty response tied to a specific request.
373
373
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
375 ``start_response`` function passed to the WSGI application.
375 ``start_response`` function passed to the WSGI application.
376 """
376 """
377 self._req = req
377 self._req = req
378 self._startresponse = startresponse
378 self._startresponse = startresponse
379
379
380 self.status = None
380 self.status = None
381 from . import wsgiheaders # avoid cycle
381 self.headers = wsgiheaders.Headers([])
382 self.headers = wsgiheaders.Headers([])
382
383
383 self._bodybytes = None
384 self._bodybytes = None
384 self._bodygen = None
385 self._bodygen = None
385 self._bodywillwrite = False
386 self._bodywillwrite = False
386 self._started = False
387 self._started = False
387 self._bodywritefn = None
388 self._bodywritefn = None
388
389
389 def _verifybody(self):
390 def _verifybody(self):
390 if (self._bodybytes is not None or self._bodygen is not None
391 if (self._bodybytes is not None or self._bodygen is not None
391 or self._bodywillwrite):
392 or self._bodywillwrite):
392 raise error.ProgrammingError('cannot define body multiple times')
393 raise error.ProgrammingError('cannot define body multiple times')
393
394
394 def setbodybytes(self, b):
395 def setbodybytes(self, b):
395 """Define the response body as static bytes.
396 """Define the response body as static bytes.
396
397
397 The empty string signals that there is no response body.
398 The empty string signals that there is no response body.
398 """
399 """
399 self._verifybody()
400 self._verifybody()
400 self._bodybytes = b
401 self._bodybytes = b
401 self.headers['Content-Length'] = '%d' % len(b)
402 self.headers['Content-Length'] = '%d' % len(b)
402
403
403 def setbodygen(self, gen):
404 def setbodygen(self, gen):
404 """Define the response body as a generator of bytes."""
405 """Define the response body as a generator of bytes."""
405 self._verifybody()
406 self._verifybody()
406 self._bodygen = gen
407 self._bodygen = gen
407
408
408 def setbodywillwrite(self):
409 def setbodywillwrite(self):
409 """Signal an intent to use write() to emit the response body.
410 """Signal an intent to use write() to emit the response body.
410
411
411 **This is the least preferred way to send a body.**
412 **This is the least preferred way to send a body.**
412
413
413 It is preferred for WSGI applications to emit a generator of chunks
414 It is preferred for WSGI applications to emit a generator of chunks
414 constituting the response body. However, some consumers can't emit
415 constituting the response body. However, some consumers can't emit
415 data this way. So, WSGI provides a way to obtain a ``write(data)``
416 data this way. So, WSGI provides a way to obtain a ``write(data)``
416 function that can be used to synchronously perform an unbuffered
417 function that can be used to synchronously perform an unbuffered
417 write.
418 write.
418
419
419 Calling this function signals an intent to produce the body in this
420 Calling this function signals an intent to produce the body in this
420 manner.
421 manner.
421 """
422 """
422 self._verifybody()
423 self._verifybody()
423 self._bodywillwrite = True
424 self._bodywillwrite = True
424
425
425 def sendresponse(self):
426 def sendresponse(self):
426 """Send the generated response to the client.
427 """Send the generated response to the client.
427
428
428 Before this is called, ``status`` must be set and one of
429 Before this is called, ``status`` must be set and one of
429 ``setbodybytes()`` or ``setbodygen()`` must be called.
430 ``setbodybytes()`` or ``setbodygen()`` must be called.
430
431
431 Calling this method multiple times is not allowed.
432 Calling this method multiple times is not allowed.
432 """
433 """
433 if self._started:
434 if self._started:
434 raise error.ProgrammingError('sendresponse() called multiple times')
435 raise error.ProgrammingError('sendresponse() called multiple times')
435
436
436 self._started = True
437 self._started = True
437
438
438 if not self.status:
439 if not self.status:
439 raise error.ProgrammingError('status line not defined')
440 raise error.ProgrammingError('status line not defined')
440
441
441 if (self._bodybytes is None and self._bodygen is None
442 if (self._bodybytes is None and self._bodygen is None
442 and not self._bodywillwrite):
443 and not self._bodywillwrite):
443 raise error.ProgrammingError('response body not defined')
444 raise error.ProgrammingError('response body not defined')
444
445
445 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
446 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
446 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
447 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
447 # and SHOULD NOT generate other headers unless they could be used
448 # and SHOULD NOT generate other headers unless they could be used
448 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
449 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
449 # states that no response body can be issued. Content-Length can
450 # states that no response body can be issued. Content-Length can
450 # be sent. But if it is present, it should be the size of the response
451 # be sent. But if it is present, it should be the size of the response
451 # that wasn't transferred.
452 # that wasn't transferred.
452 if self.status.startswith('304 '):
453 if self.status.startswith('304 '):
453 # setbodybytes('') will set C-L to 0. This doesn't conform with the
454 # setbodybytes('') will set C-L to 0. This doesn't conform with the
454 # spec. So remove it.
455 # spec. So remove it.
455 if self.headers.get('Content-Length') == '0':
456 if self.headers.get('Content-Length') == '0':
456 del self.headers['Content-Length']
457 del self.headers['Content-Length']
457
458
458 # Strictly speaking, this is too strict. But until it causes
459 # Strictly speaking, this is too strict. But until it causes
459 # problems, let's be strict.
460 # problems, let's be strict.
460 badheaders = {k for k in self.headers.keys()
461 badheaders = {k for k in self.headers.keys()
461 if k.lower() not in ('date', 'etag', 'expires',
462 if k.lower() not in ('date', 'etag', 'expires',
462 'cache-control',
463 'cache-control',
463 'content-location',
464 'content-location',
464 'vary')}
465 'vary')}
465 if badheaders:
466 if badheaders:
466 raise error.ProgrammingError(
467 raise error.ProgrammingError(
467 'illegal header on 304 response: %s' %
468 'illegal header on 304 response: %s' %
468 ', '.join(sorted(badheaders)))
469 ', '.join(sorted(badheaders)))
469
470
470 if self._bodygen is not None or self._bodywillwrite:
471 if self._bodygen is not None or self._bodywillwrite:
471 raise error.ProgrammingError("must use setbodybytes('') with "
472 raise error.ProgrammingError("must use setbodybytes('') with "
472 "304 responses")
473 "304 responses")
473
474
474 # Various HTTP clients (notably httplib) won't read the HTTP response
475 # Various HTTP clients (notably httplib) won't read the HTTP response
475 # until the HTTP request has been sent in full. If servers (us) send a
476 # until the HTTP request has been sent in full. If servers (us) send a
476 # response before the HTTP request has been fully sent, the connection
477 # response before the HTTP request has been fully sent, the connection
477 # may deadlock because neither end is reading.
478 # may deadlock because neither end is reading.
478 #
479 #
479 # We work around this by "draining" the request data before
480 # We work around this by "draining" the request data before
480 # sending any response in some conditions.
481 # sending any response in some conditions.
481 drain = False
482 drain = False
482 close = False
483 close = False
483
484
484 # If the client sent Expect: 100-continue, we assume it is smart enough
485 # If the client sent Expect: 100-continue, we assume it is smart enough
485 # to deal with the server sending a response before reading the request.
486 # to deal with the server sending a response before reading the request.
486 # (httplib doesn't do this.)
487 # (httplib doesn't do this.)
487 if self._req.headers.get('Expect', '').lower() == '100-continue':
488 if self._req.headers.get('Expect', '').lower() == '100-continue':
488 pass
489 pass
489 # Only tend to request methods that have bodies. Strictly speaking,
490 # Only tend to request methods that have bodies. Strictly speaking,
490 # we should sniff for a body. But this is fine for our existing
491 # we should sniff for a body. But this is fine for our existing
491 # WSGI applications.
492 # WSGI applications.
492 elif self._req.method not in ('POST', 'PUT'):
493 elif self._req.method not in ('POST', 'PUT'):
493 pass
494 pass
494 else:
495 else:
495 # If we don't know how much data to read, there's no guarantee
496 # If we don't know how much data to read, there's no guarantee
496 # that we can drain the request responsibly. The WSGI
497 # that we can drain the request responsibly. The WSGI
497 # specification only says that servers *should* ensure the
498 # specification only says that servers *should* ensure the
498 # input stream doesn't overrun the actual request. So there's
499 # input stream doesn't overrun the actual request. So there's
499 # no guarantee that reading until EOF won't corrupt the stream
500 # no guarantee that reading until EOF won't corrupt the stream
500 # state.
501 # state.
501 if not isinstance(self._req.bodyfh, util.cappedreader):
502 if not isinstance(self._req.bodyfh, util.cappedreader):
502 close = True
503 close = True
503 else:
504 else:
504 # We /could/ only drain certain HTTP response codes. But 200 and
505 # We /could/ only drain certain HTTP response codes. But 200 and
505 # non-200 wire protocol responses both require draining. Since
506 # non-200 wire protocol responses both require draining. Since
506 # we have a capped reader in place for all situations where we
507 # we have a capped reader in place for all situations where we
507 # drain, it is safe to read from that stream. We'll either do
508 # drain, it is safe to read from that stream. We'll either do
508 # a drain or no-op if we're already at EOF.
509 # a drain or no-op if we're already at EOF.
509 drain = True
510 drain = True
510
511
511 if close:
512 if close:
512 self.headers['Connection'] = 'Close'
513 self.headers['Connection'] = 'Close'
513
514
514 if drain:
515 if drain:
515 assert isinstance(self._req.bodyfh, util.cappedreader)
516 assert isinstance(self._req.bodyfh, util.cappedreader)
516 while True:
517 while True:
517 chunk = self._req.bodyfh.read(32768)
518 chunk = self._req.bodyfh.read(32768)
518 if not chunk:
519 if not chunk:
519 break
520 break
520
521
521 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
522 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
522 k, v in self.headers.items()]
523 k, v in self.headers.items()]
523 write = self._startresponse(pycompat.sysstr(self.status),
524 write = self._startresponse(pycompat.sysstr(self.status),
524 strheaders)
525 strheaders)
525
526
526 if self._bodybytes:
527 if self._bodybytes:
527 yield self._bodybytes
528 yield self._bodybytes
528 elif self._bodygen:
529 elif self._bodygen:
529 for chunk in self._bodygen:
530 for chunk in self._bodygen:
530 yield chunk
531 yield chunk
531 elif self._bodywillwrite:
532 elif self._bodywillwrite:
532 self._bodywritefn = write
533 self._bodywritefn = write
533 else:
534 else:
534 error.ProgrammingError('do not know how to send body')
535 error.ProgrammingError('do not know how to send body')
535
536
536 def getbodyfile(self):
537 def getbodyfile(self):
537 """Obtain a file object like object representing the response body.
538 """Obtain a file object like object representing the response body.
538
539
539 For this to work, you must call ``setbodywillwrite()`` and then
540 For this to work, you must call ``setbodywillwrite()`` and then
540 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
541 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
541 function won't run to completion unless the generator is advanced. The
542 function won't run to completion unless the generator is advanced. The
542 generator yields not items. The easiest way to consume it is with
543 generator yields not items. The easiest way to consume it is with
543 ``list(res.sendresponse())``, which should resolve to an empty list -
544 ``list(res.sendresponse())``, which should resolve to an empty list -
544 ``[]``.
545 ``[]``.
545 """
546 """
546 if not self._bodywillwrite:
547 if not self._bodywillwrite:
547 raise error.ProgrammingError('must call setbodywillwrite() first')
548 raise error.ProgrammingError('must call setbodywillwrite() first')
548
549
549 if not self._started:
550 if not self._started:
550 raise error.ProgrammingError('must call sendresponse() first; did '
551 raise error.ProgrammingError('must call sendresponse() first; did '
551 'you remember to consume it since it '
552 'you remember to consume it since it '
552 'is a generator?')
553 'is a generator?')
553
554
554 assert self._bodywritefn
555 assert self._bodywritefn
555 return offsettrackingwriter(self._bodywritefn)
556 return offsettrackingwriter(self._bodywritefn)
556
557
557 def wsgiapplication(app_maker):
558 def wsgiapplication(app_maker):
558 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
559 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
559 can and should now be used as a WSGI application.'''
560 can and should now be used as a WSGI application.'''
560 application = app_maker()
561 application = app_maker()
561 def run_wsgi(env, respond):
562 def run_wsgi(env, respond):
562 return application(env, respond)
563 return application(env, respond)
563 return run_wsgi
564 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now