##// END OF EJS Templates
hgweb: guard against empty Content-Length header...
Gregory Szorc -
r37843:e82b137a @21 stable
parent child Browse files
Show More
@@ -1,574 +1,575 b''
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 #import wsgiref.validate
11 #import wsgiref.validate
12
12
13 from ..thirdparty import (
13 from ..thirdparty import (
14 attr,
14 attr,
15 )
15 )
16 from .. import (
16 from .. import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 class multidict(object):
22 class multidict(object):
23 """A dict like object that can store multiple values for a key.
23 """A dict like object that can store multiple values for a key.
24
24
25 Used to store parsed request parameters.
25 Used to store parsed request parameters.
26
26
27 This is inspired by WebOb's class of the same name.
27 This is inspired by WebOb's class of the same name.
28 """
28 """
29 def __init__(self):
29 def __init__(self):
30 self._items = {}
30 self._items = {}
31
31
32 def __getitem__(self, key):
32 def __getitem__(self, key):
33 """Returns the last set value for a key."""
33 """Returns the last set value for a key."""
34 return self._items[key][-1]
34 return self._items[key][-1]
35
35
36 def __setitem__(self, key, value):
36 def __setitem__(self, key, value):
37 """Replace a values for a key with a new value."""
37 """Replace a values for a key with a new value."""
38 self._items[key] = [value]
38 self._items[key] = [value]
39
39
40 def __delitem__(self, key):
40 def __delitem__(self, key):
41 """Delete all values for a key."""
41 """Delete all values for a key."""
42 del self._items[key]
42 del self._items[key]
43
43
44 def __contains__(self, key):
44 def __contains__(self, key):
45 return key in self._items
45 return key in self._items
46
46
47 def __len__(self):
47 def __len__(self):
48 return len(self._items)
48 return len(self._items)
49
49
50 def get(self, key, default=None):
50 def get(self, key, default=None):
51 try:
51 try:
52 return self.__getitem__(key)
52 return self.__getitem__(key)
53 except KeyError:
53 except KeyError:
54 return default
54 return default
55
55
56 def add(self, key, value):
56 def add(self, key, value):
57 """Add a new value for a key. Does not replace existing values."""
57 """Add a new value for a key. Does not replace existing values."""
58 self._items.setdefault(key, []).append(value)
58 self._items.setdefault(key, []).append(value)
59
59
60 def getall(self, key):
60 def getall(self, key):
61 """Obtains all values for a key."""
61 """Obtains all values for a key."""
62 return self._items.get(key, [])
62 return self._items.get(key, [])
63
63
64 def getone(self, key):
64 def getone(self, key):
65 """Obtain a single value for a key.
65 """Obtain a single value for a key.
66
66
67 Raises KeyError if key not defined or it has multiple values set.
67 Raises KeyError if key not defined or it has multiple values set.
68 """
68 """
69 vals = self._items[key]
69 vals = self._items[key]
70
70
71 if len(vals) > 1:
71 if len(vals) > 1:
72 raise KeyError('multiple values for %r' % key)
72 raise KeyError('multiple values for %r' % key)
73
73
74 return vals[0]
74 return vals[0]
75
75
76 def asdictoflists(self):
76 def asdictoflists(self):
77 return {k: list(v) for k, v in self._items.iteritems()}
77 return {k: list(v) for k, v in self._items.iteritems()}
78
78
79 @attr.s(frozen=True)
79 @attr.s(frozen=True)
80 class parsedrequest(object):
80 class parsedrequest(object):
81 """Represents a parsed WSGI request.
81 """Represents a parsed WSGI request.
82
82
83 Contains both parsed parameters as well as a handle on the input stream.
83 Contains both parsed parameters as well as a handle on the input stream.
84 """
84 """
85
85
86 # Request method.
86 # Request method.
87 method = attr.ib()
87 method = attr.ib()
88 # Full URL for this request.
88 # Full URL for this request.
89 url = attr.ib()
89 url = attr.ib()
90 # URL without any path components. Just <proto>://<host><port>.
90 # URL without any path components. Just <proto>://<host><port>.
91 baseurl = attr.ib()
91 baseurl = attr.ib()
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # of HTTP: Host header for hostname. This is likely what clients used.
93 # of HTTP: Host header for hostname. This is likely what clients used.
94 advertisedurl = attr.ib()
94 advertisedurl = attr.ib()
95 advertisedbaseurl = attr.ib()
95 advertisedbaseurl = attr.ib()
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 urlscheme = attr.ib()
97 urlscheme = attr.ib()
98 # Value of REMOTE_USER, if set, or None.
98 # Value of REMOTE_USER, if set, or None.
99 remoteuser = attr.ib()
99 remoteuser = attr.ib()
100 # Value of REMOTE_HOST, if set, or None.
100 # Value of REMOTE_HOST, if set, or None.
101 remotehost = attr.ib()
101 remotehost = attr.ib()
102 # Relative WSGI application path. If defined, will begin with a
102 # Relative WSGI application path. If defined, will begin with a
103 # ``/``.
103 # ``/``.
104 apppath = attr.ib()
104 apppath = attr.ib()
105 # List of path parts to be used for dispatch.
105 # List of path parts to be used for dispatch.
106 dispatchparts = attr.ib()
106 dispatchparts = attr.ib()
107 # URL path component (no query string) used for dispatch. Can be
107 # URL path component (no query string) used for dispatch. Can be
108 # ``None`` to signal no path component given to the request, an
108 # ``None`` to signal no path component given to the request, an
109 # empty string to signal a request to the application's root URL,
109 # empty string to signal a request to the application's root URL,
110 # or a string not beginning with ``/`` containing the requested
110 # or a string not beginning with ``/`` containing the requested
111 # path under the application.
111 # path under the application.
112 dispatchpath = attr.ib()
112 dispatchpath = attr.ib()
113 # The name of the repository being accessed.
113 # The name of the repository being accessed.
114 reponame = attr.ib()
114 reponame = attr.ib()
115 # Raw query string (part after "?" in URL).
115 # Raw query string (part after "?" in URL).
116 querystring = attr.ib()
116 querystring = attr.ib()
117 # multidict of query string parameters.
117 # multidict of query string parameters.
118 qsparams = attr.ib()
118 qsparams = attr.ib()
119 # wsgiref.headers.Headers instance. Operates like a dict with case
119 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # insensitive keys.
120 # insensitive keys.
121 headers = attr.ib()
121 headers = attr.ib()
122 # Request body input stream.
122 # Request body input stream.
123 bodyfh = attr.ib()
123 bodyfh = attr.ib()
124 # WSGI environment dict, unmodified.
124 # WSGI environment dict, unmodified.
125 rawenv = attr.ib()
125 rawenv = attr.ib()
126
126
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
128 """Parse URL components from environment variables.
128 """Parse URL components from environment variables.
129
129
130 WSGI defines request attributes via environment variables. This function
130 WSGI defines request attributes via environment variables. This function
131 parses the environment variables into a data structure.
131 parses the environment variables into a data structure.
132
132
133 If ``reponame`` is defined, the leading path components matching that
133 If ``reponame`` is defined, the leading path components matching that
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 This simulates the world view of a WSGI application that processes
135 This simulates the world view of a WSGI application that processes
136 requests from the base URL of a repo.
136 requests from the base URL of a repo.
137
137
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 is defined, it is used - instead of the WSGI environment variables - for
139 is defined, it is used - instead of the WSGI environment variables - for
140 constructing URL components up to and including the WSGI application path.
140 constructing URL components up to and including the WSGI application path.
141 For example, if the current WSGI application is at ``/repo`` and a request
141 For example, if the current WSGI application is at ``/repo`` and a request
142 is made to ``/rev/@`` with this argument set to
142 is made to ``/rev/@`` with this argument set to
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147
147
148 ``bodyfh`` can be used to specify a file object to read the request body
148 ``bodyfh`` can be used to specify a file object to read the request body
149 from. If not defined, ``wsgi.input`` from the environment dict is used.
149 from. If not defined, ``wsgi.input`` from the environment dict is used.
150 """
150 """
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
152
152
153 # We first validate that the incoming object conforms with the WSGI spec.
153 # We first validate that the incoming object conforms with the WSGI spec.
154 # We only want to be dealing with spec-conforming WSGI implementations.
154 # We only want to be dealing with spec-conforming WSGI implementations.
155 # TODO enable this once we fix internal violations.
155 # TODO enable this once we fix internal violations.
156 #wsgiref.validate.check_environ(env)
156 #wsgiref.validate.check_environ(env)
157
157
158 # PEP-0333 states that environment keys and values are native strings
158 # PEP-0333 states that environment keys and values are native strings
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
161 # in Mercurial, so mass convert string keys and values to bytes.
161 # in Mercurial, so mass convert string keys and values to bytes.
162 if pycompat.ispy3:
162 if pycompat.ispy3:
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v
165 for k, v in env.iteritems()}
165 for k, v in env.iteritems()}
166
166
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly
168 # to an hgweb instance using this environment variable. This was always
168 # to an hgweb instance using this environment variable. This was always
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
170 if not reponame:
170 if not reponame:
171 reponame = env.get('REPO_NAME')
171 reponame = env.get('REPO_NAME')
172
172
173 if altbaseurl:
173 if altbaseurl:
174 altbaseurl = util.url(altbaseurl)
174 altbaseurl = util.url(altbaseurl)
175
175
176 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
176 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
177 # the environment variables.
177 # the environment variables.
178 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
178 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
179 # how URLs are reconstructed.
179 # how URLs are reconstructed.
180 fullurl = env['wsgi.url_scheme'] + '://'
180 fullurl = env['wsgi.url_scheme'] + '://'
181
181
182 if altbaseurl and altbaseurl.scheme:
182 if altbaseurl and altbaseurl.scheme:
183 advertisedfullurl = altbaseurl.scheme + '://'
183 advertisedfullurl = altbaseurl.scheme + '://'
184 else:
184 else:
185 advertisedfullurl = fullurl
185 advertisedfullurl = fullurl
186
186
187 def addport(s, port):
187 def addport(s, port):
188 if s.startswith('https://'):
188 if s.startswith('https://'):
189 if port != '443':
189 if port != '443':
190 s += ':' + port
190 s += ':' + port
191 else:
191 else:
192 if port != '80':
192 if port != '80':
193 s += ':' + port
193 s += ':' + port
194
194
195 return s
195 return s
196
196
197 if env.get('HTTP_HOST'):
197 if env.get('HTTP_HOST'):
198 fullurl += env['HTTP_HOST']
198 fullurl += env['HTTP_HOST']
199 else:
199 else:
200 fullurl += env['SERVER_NAME']
200 fullurl += env['SERVER_NAME']
201 fullurl = addport(fullurl, env['SERVER_PORT'])
201 fullurl = addport(fullurl, env['SERVER_PORT'])
202
202
203 if altbaseurl and altbaseurl.host:
203 if altbaseurl and altbaseurl.host:
204 advertisedfullurl += altbaseurl.host
204 advertisedfullurl += altbaseurl.host
205
205
206 if altbaseurl.port:
206 if altbaseurl.port:
207 port = altbaseurl.port
207 port = altbaseurl.port
208 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
208 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
209 port = '80'
209 port = '80'
210 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
210 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
211 port = '443'
211 port = '443'
212 else:
212 else:
213 port = env['SERVER_PORT']
213 port = env['SERVER_PORT']
214
214
215 advertisedfullurl = addport(advertisedfullurl, port)
215 advertisedfullurl = addport(advertisedfullurl, port)
216 else:
216 else:
217 advertisedfullurl += env['SERVER_NAME']
217 advertisedfullurl += env['SERVER_NAME']
218 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
218 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
219
219
220 baseurl = fullurl
220 baseurl = fullurl
221 advertisedbaseurl = advertisedfullurl
221 advertisedbaseurl = advertisedfullurl
222
222
223 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
223 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
224 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
225
225
226 if altbaseurl:
226 if altbaseurl:
227 path = altbaseurl.path or ''
227 path = altbaseurl.path or ''
228 if path and not path.startswith('/'):
228 if path and not path.startswith('/'):
229 path = '/' + path
229 path = '/' + path
230 advertisedfullurl += util.urlreq.quote(path)
230 advertisedfullurl += util.urlreq.quote(path)
231 else:
231 else:
232 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
232 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
233
233
234 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
234 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
235
235
236 if env.get('QUERY_STRING'):
236 if env.get('QUERY_STRING'):
237 fullurl += '?' + env['QUERY_STRING']
237 fullurl += '?' + env['QUERY_STRING']
238 advertisedfullurl += '?' + env['QUERY_STRING']
238 advertisedfullurl += '?' + env['QUERY_STRING']
239
239
240 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
240 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
241 # that represents the repository being dispatched to. When computing
241 # that represents the repository being dispatched to. When computing
242 # the dispatch info, we ignore these leading path components.
242 # the dispatch info, we ignore these leading path components.
243
243
244 if altbaseurl:
244 if altbaseurl:
245 apppath = altbaseurl.path or ''
245 apppath = altbaseurl.path or ''
246 if apppath and not apppath.startswith('/'):
246 if apppath and not apppath.startswith('/'):
247 apppath = '/' + apppath
247 apppath = '/' + apppath
248 else:
248 else:
249 apppath = env.get('SCRIPT_NAME', '')
249 apppath = env.get('SCRIPT_NAME', '')
250
250
251 if reponame:
251 if reponame:
252 repoprefix = '/' + reponame.strip('/')
252 repoprefix = '/' + reponame.strip('/')
253
253
254 if not env.get('PATH_INFO'):
254 if not env.get('PATH_INFO'):
255 raise error.ProgrammingError('reponame requires PATH_INFO')
255 raise error.ProgrammingError('reponame requires PATH_INFO')
256
256
257 if not env['PATH_INFO'].startswith(repoprefix):
257 if not env['PATH_INFO'].startswith(repoprefix):
258 raise error.ProgrammingError('PATH_INFO does not begin with repo '
258 raise error.ProgrammingError('PATH_INFO does not begin with repo '
259 'name: %s (%s)' % (env['PATH_INFO'],
259 'name: %s (%s)' % (env['PATH_INFO'],
260 reponame))
260 reponame))
261
261
262 dispatchpath = env['PATH_INFO'][len(repoprefix):]
262 dispatchpath = env['PATH_INFO'][len(repoprefix):]
263
263
264 if dispatchpath and not dispatchpath.startswith('/'):
264 if dispatchpath and not dispatchpath.startswith('/'):
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
266 'not end at path delimiter: %s (%s)' %
266 'not end at path delimiter: %s (%s)' %
267 (env['PATH_INFO'], reponame))
267 (env['PATH_INFO'], reponame))
268
268
269 apppath = apppath.rstrip('/') + repoprefix
269 apppath = apppath.rstrip('/') + repoprefix
270 dispatchparts = dispatchpath.strip('/').split('/')
270 dispatchparts = dispatchpath.strip('/').split('/')
271 dispatchpath = '/'.join(dispatchparts)
271 dispatchpath = '/'.join(dispatchparts)
272
272
273 elif 'PATH_INFO' in env:
273 elif 'PATH_INFO' in env:
274 if env['PATH_INFO'].strip('/'):
274 if env['PATH_INFO'].strip('/'):
275 dispatchparts = env['PATH_INFO'].strip('/').split('/')
275 dispatchparts = env['PATH_INFO'].strip('/').split('/')
276 dispatchpath = '/'.join(dispatchparts)
276 dispatchpath = '/'.join(dispatchparts)
277 else:
277 else:
278 dispatchparts = []
278 dispatchparts = []
279 dispatchpath = ''
279 dispatchpath = ''
280 else:
280 else:
281 dispatchparts = []
281 dispatchparts = []
282 dispatchpath = None
282 dispatchpath = None
283
283
284 querystring = env.get('QUERY_STRING', '')
284 querystring = env.get('QUERY_STRING', '')
285
285
286 # We store as a list so we have ordering information. We also store as
286 # We store as a list so we have ordering information. We also store as
287 # a dict to facilitate fast lookup.
287 # a dict to facilitate fast lookup.
288 qsparams = multidict()
288 qsparams = multidict()
289 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
289 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
290 qsparams.add(k, v)
290 qsparams.add(k, v)
291
291
292 # HTTP_* keys contain HTTP request headers. The Headers structure should
292 # HTTP_* keys contain HTTP request headers. The Headers structure should
293 # perform case normalization for us. We just rewrite underscore to dash
293 # perform case normalization for us. We just rewrite underscore to dash
294 # so keys match what likely went over the wire.
294 # so keys match what likely went over the wire.
295 headers = []
295 headers = []
296 for k, v in env.iteritems():
296 for k, v in env.iteritems():
297 if k.startswith('HTTP_'):
297 if k.startswith('HTTP_'):
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
299
299
300 from . import wsgiheaders # avoid cycle
300 from . import wsgiheaders # avoid cycle
301 headers = wsgiheaders.Headers(headers)
301 headers = wsgiheaders.Headers(headers)
302
302
303 # This is kind of a lie because the HTTP header wasn't explicitly
303 # This is kind of a lie because the HTTP header wasn't explicitly
304 # sent. But for all intents and purposes it should be OK to lie about
304 # sent. But for all intents and purposes it should be OK to lie about
305 # this, since a consumer will either either value to determine how many
305 # this, since a consumer will either either value to determine how many
306 # bytes are available to read.
306 # bytes are available to read.
307 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
307 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
308 headers['Content-Length'] = env['CONTENT_LENGTH']
308 headers['Content-Length'] = env['CONTENT_LENGTH']
309
309
310 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
310 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
311 headers['Content-Type'] = env['CONTENT_TYPE']
311 headers['Content-Type'] = env['CONTENT_TYPE']
312
312
313 if bodyfh is None:
313 if bodyfh is None:
314 bodyfh = env['wsgi.input']
314 bodyfh = env['wsgi.input']
315 if 'Content-Length' in headers:
315 if 'Content-Length' in headers:
316 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
316 bodyfh = util.cappedreader(bodyfh,
317 int(headers['Content-Length'] or '0'))
317
318
318 return parsedrequest(method=env['REQUEST_METHOD'],
319 return parsedrequest(method=env['REQUEST_METHOD'],
319 url=fullurl, baseurl=baseurl,
320 url=fullurl, baseurl=baseurl,
320 advertisedurl=advertisedfullurl,
321 advertisedurl=advertisedfullurl,
321 advertisedbaseurl=advertisedbaseurl,
322 advertisedbaseurl=advertisedbaseurl,
322 urlscheme=env['wsgi.url_scheme'],
323 urlscheme=env['wsgi.url_scheme'],
323 remoteuser=env.get('REMOTE_USER'),
324 remoteuser=env.get('REMOTE_USER'),
324 remotehost=env.get('REMOTE_HOST'),
325 remotehost=env.get('REMOTE_HOST'),
325 apppath=apppath,
326 apppath=apppath,
326 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
327 reponame=reponame,
328 reponame=reponame,
328 querystring=querystring,
329 querystring=querystring,
329 qsparams=qsparams,
330 qsparams=qsparams,
330 headers=headers,
331 headers=headers,
331 bodyfh=bodyfh,
332 bodyfh=bodyfh,
332 rawenv=env)
333 rawenv=env)
333
334
334 class offsettrackingwriter(object):
335 class offsettrackingwriter(object):
335 """A file object like object that is append only and tracks write count.
336 """A file object like object that is append only and tracks write count.
336
337
337 Instances are bound to a callable. This callable is called with data
338 Instances are bound to a callable. This callable is called with data
338 whenever a ``write()`` is attempted.
339 whenever a ``write()`` is attempted.
339
340
340 Instances track the amount of written data so they can answer ``tell()``
341 Instances track the amount of written data so they can answer ``tell()``
341 requests.
342 requests.
342
343
343 The intent of this class is to wrap the ``write()`` function returned by
344 The intent of this class is to wrap the ``write()`` function returned by
344 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
345 not a file object, it doesn't implement other file object methods.
346 not a file object, it doesn't implement other file object methods.
346 """
347 """
347 def __init__(self, writefn):
348 def __init__(self, writefn):
348 self._write = writefn
349 self._write = writefn
349 self._offset = 0
350 self._offset = 0
350
351
351 def write(self, s):
352 def write(self, s):
352 res = self._write(s)
353 res = self._write(s)
353 # Some Python objects don't report the number of bytes written.
354 # Some Python objects don't report the number of bytes written.
354 if res is None:
355 if res is None:
355 self._offset += len(s)
356 self._offset += len(s)
356 else:
357 else:
357 self._offset += res
358 self._offset += res
358
359
359 def flush(self):
360 def flush(self):
360 pass
361 pass
361
362
362 def tell(self):
363 def tell(self):
363 return self._offset
364 return self._offset
364
365
365 class wsgiresponse(object):
366 class wsgiresponse(object):
366 """Represents a response to a WSGI request.
367 """Represents a response to a WSGI request.
367
368
368 A response consists of a status line, headers, and a body.
369 A response consists of a status line, headers, and a body.
369
370
370 Consumers must populate the ``status`` and ``headers`` fields and
371 Consumers must populate the ``status`` and ``headers`` fields and
371 make a call to a ``setbody*()`` method before the response can be
372 make a call to a ``setbody*()`` method before the response can be
372 issued.
373 issued.
373
374
374 When it is time to start sending the response over the wire,
375 When it is time to start sending the response over the wire,
375 ``sendresponse()`` is called. It handles emitting the header portion
376 ``sendresponse()`` is called. It handles emitting the header portion
376 of the response message. It then yields chunks of body data to be
377 of the response message. It then yields chunks of body data to be
377 written to the peer. Typically, the WSGI application itself calls
378 written to the peer. Typically, the WSGI application itself calls
378 and returns the value from ``sendresponse()``.
379 and returns the value from ``sendresponse()``.
379 """
380 """
380
381
381 def __init__(self, req, startresponse):
382 def __init__(self, req, startresponse):
382 """Create an empty response tied to a specific request.
383 """Create an empty response tied to a specific request.
383
384
384 ``req`` is a ``parsedrequest``. ``startresponse`` is the
385 ``req`` is a ``parsedrequest``. ``startresponse`` is the
385 ``start_response`` function passed to the WSGI application.
386 ``start_response`` function passed to the WSGI application.
386 """
387 """
387 self._req = req
388 self._req = req
388 self._startresponse = startresponse
389 self._startresponse = startresponse
389
390
390 self.status = None
391 self.status = None
391 from . import wsgiheaders # avoid cycle
392 from . import wsgiheaders # avoid cycle
392 self.headers = wsgiheaders.Headers([])
393 self.headers = wsgiheaders.Headers([])
393
394
394 self._bodybytes = None
395 self._bodybytes = None
395 self._bodygen = None
396 self._bodygen = None
396 self._bodywillwrite = False
397 self._bodywillwrite = False
397 self._started = False
398 self._started = False
398 self._bodywritefn = None
399 self._bodywritefn = None
399
400
400 def _verifybody(self):
401 def _verifybody(self):
401 if (self._bodybytes is not None or self._bodygen is not None
402 if (self._bodybytes is not None or self._bodygen is not None
402 or self._bodywillwrite):
403 or self._bodywillwrite):
403 raise error.ProgrammingError('cannot define body multiple times')
404 raise error.ProgrammingError('cannot define body multiple times')
404
405
405 def setbodybytes(self, b):
406 def setbodybytes(self, b):
406 """Define the response body as static bytes.
407 """Define the response body as static bytes.
407
408
408 The empty string signals that there is no response body.
409 The empty string signals that there is no response body.
409 """
410 """
410 self._verifybody()
411 self._verifybody()
411 self._bodybytes = b
412 self._bodybytes = b
412 self.headers['Content-Length'] = '%d' % len(b)
413 self.headers['Content-Length'] = '%d' % len(b)
413
414
414 def setbodygen(self, gen):
415 def setbodygen(self, gen):
415 """Define the response body as a generator of bytes."""
416 """Define the response body as a generator of bytes."""
416 self._verifybody()
417 self._verifybody()
417 self._bodygen = gen
418 self._bodygen = gen
418
419
419 def setbodywillwrite(self):
420 def setbodywillwrite(self):
420 """Signal an intent to use write() to emit the response body.
421 """Signal an intent to use write() to emit the response body.
421
422
422 **This is the least preferred way to send a body.**
423 **This is the least preferred way to send a body.**
423
424
424 It is preferred for WSGI applications to emit a generator of chunks
425 It is preferred for WSGI applications to emit a generator of chunks
425 constituting the response body. However, some consumers can't emit
426 constituting the response body. However, some consumers can't emit
426 data this way. So, WSGI provides a way to obtain a ``write(data)``
427 data this way. So, WSGI provides a way to obtain a ``write(data)``
427 function that can be used to synchronously perform an unbuffered
428 function that can be used to synchronously perform an unbuffered
428 write.
429 write.
429
430
430 Calling this function signals an intent to produce the body in this
431 Calling this function signals an intent to produce the body in this
431 manner.
432 manner.
432 """
433 """
433 self._verifybody()
434 self._verifybody()
434 self._bodywillwrite = True
435 self._bodywillwrite = True
435
436
436 def sendresponse(self):
437 def sendresponse(self):
437 """Send the generated response to the client.
438 """Send the generated response to the client.
438
439
439 Before this is called, ``status`` must be set and one of
440 Before this is called, ``status`` must be set and one of
440 ``setbodybytes()`` or ``setbodygen()`` must be called.
441 ``setbodybytes()`` or ``setbodygen()`` must be called.
441
442
442 Calling this method multiple times is not allowed.
443 Calling this method multiple times is not allowed.
443 """
444 """
444 if self._started:
445 if self._started:
445 raise error.ProgrammingError('sendresponse() called multiple times')
446 raise error.ProgrammingError('sendresponse() called multiple times')
446
447
447 self._started = True
448 self._started = True
448
449
449 if not self.status:
450 if not self.status:
450 raise error.ProgrammingError('status line not defined')
451 raise error.ProgrammingError('status line not defined')
451
452
452 if (self._bodybytes is None and self._bodygen is None
453 if (self._bodybytes is None and self._bodygen is None
453 and not self._bodywillwrite):
454 and not self._bodywillwrite):
454 raise error.ProgrammingError('response body not defined')
455 raise error.ProgrammingError('response body not defined')
455
456
456 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
457 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
458 # and SHOULD NOT generate other headers unless they could be used
459 # and SHOULD NOT generate other headers unless they could be used
459 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
460 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
460 # states that no response body can be issued. Content-Length can
461 # states that no response body can be issued. Content-Length can
461 # be sent. But if it is present, it should be the size of the response
462 # be sent. But if it is present, it should be the size of the response
462 # that wasn't transferred.
463 # that wasn't transferred.
463 if self.status.startswith('304 '):
464 if self.status.startswith('304 '):
464 # setbodybytes('') will set C-L to 0. This doesn't conform with the
465 # setbodybytes('') will set C-L to 0. This doesn't conform with the
465 # spec. So remove it.
466 # spec. So remove it.
466 if self.headers.get('Content-Length') == '0':
467 if self.headers.get('Content-Length') == '0':
467 del self.headers['Content-Length']
468 del self.headers['Content-Length']
468
469
469 # Strictly speaking, this is too strict. But until it causes
470 # Strictly speaking, this is too strict. But until it causes
470 # problems, let's be strict.
471 # problems, let's be strict.
471 badheaders = {k for k in self.headers.keys()
472 badheaders = {k for k in self.headers.keys()
472 if k.lower() not in ('date', 'etag', 'expires',
473 if k.lower() not in ('date', 'etag', 'expires',
473 'cache-control',
474 'cache-control',
474 'content-location',
475 'content-location',
475 'vary')}
476 'vary')}
476 if badheaders:
477 if badheaders:
477 raise error.ProgrammingError(
478 raise error.ProgrammingError(
478 'illegal header on 304 response: %s' %
479 'illegal header on 304 response: %s' %
479 ', '.join(sorted(badheaders)))
480 ', '.join(sorted(badheaders)))
480
481
481 if self._bodygen is not None or self._bodywillwrite:
482 if self._bodygen is not None or self._bodywillwrite:
482 raise error.ProgrammingError("must use setbodybytes('') with "
483 raise error.ProgrammingError("must use setbodybytes('') with "
483 "304 responses")
484 "304 responses")
484
485
485 # Various HTTP clients (notably httplib) won't read the HTTP response
486 # Various HTTP clients (notably httplib) won't read the HTTP response
486 # until the HTTP request has been sent in full. If servers (us) send a
487 # until the HTTP request has been sent in full. If servers (us) send a
487 # response before the HTTP request has been fully sent, the connection
488 # response before the HTTP request has been fully sent, the connection
488 # may deadlock because neither end is reading.
489 # may deadlock because neither end is reading.
489 #
490 #
490 # We work around this by "draining" the request data before
491 # We work around this by "draining" the request data before
491 # sending any response in some conditions.
492 # sending any response in some conditions.
492 drain = False
493 drain = False
493 close = False
494 close = False
494
495
495 # If the client sent Expect: 100-continue, we assume it is smart enough
496 # If the client sent Expect: 100-continue, we assume it is smart enough
496 # to deal with the server sending a response before reading the request.
497 # to deal with the server sending a response before reading the request.
497 # (httplib doesn't do this.)
498 # (httplib doesn't do this.)
498 if self._req.headers.get('Expect', '').lower() == '100-continue':
499 if self._req.headers.get('Expect', '').lower() == '100-continue':
499 pass
500 pass
500 # Only tend to request methods that have bodies. Strictly speaking,
501 # Only tend to request methods that have bodies. Strictly speaking,
501 # we should sniff for a body. But this is fine for our existing
502 # we should sniff for a body. But this is fine for our existing
502 # WSGI applications.
503 # WSGI applications.
503 elif self._req.method not in ('POST', 'PUT'):
504 elif self._req.method not in ('POST', 'PUT'):
504 pass
505 pass
505 else:
506 else:
506 # If we don't know how much data to read, there's no guarantee
507 # If we don't know how much data to read, there's no guarantee
507 # that we can drain the request responsibly. The WSGI
508 # that we can drain the request responsibly. The WSGI
508 # specification only says that servers *should* ensure the
509 # specification only says that servers *should* ensure the
509 # input stream doesn't overrun the actual request. So there's
510 # input stream doesn't overrun the actual request. So there's
510 # no guarantee that reading until EOF won't corrupt the stream
511 # no guarantee that reading until EOF won't corrupt the stream
511 # state.
512 # state.
512 if not isinstance(self._req.bodyfh, util.cappedreader):
513 if not isinstance(self._req.bodyfh, util.cappedreader):
513 close = True
514 close = True
514 else:
515 else:
515 # We /could/ only drain certain HTTP response codes. But 200 and
516 # We /could/ only drain certain HTTP response codes. But 200 and
516 # non-200 wire protocol responses both require draining. Since
517 # non-200 wire protocol responses both require draining. Since
517 # we have a capped reader in place for all situations where we
518 # we have a capped reader in place for all situations where we
518 # drain, it is safe to read from that stream. We'll either do
519 # drain, it is safe to read from that stream. We'll either do
519 # a drain or no-op if we're already at EOF.
520 # a drain or no-op if we're already at EOF.
520 drain = True
521 drain = True
521
522
522 if close:
523 if close:
523 self.headers['Connection'] = 'Close'
524 self.headers['Connection'] = 'Close'
524
525
525 if drain:
526 if drain:
526 assert isinstance(self._req.bodyfh, util.cappedreader)
527 assert isinstance(self._req.bodyfh, util.cappedreader)
527 while True:
528 while True:
528 chunk = self._req.bodyfh.read(32768)
529 chunk = self._req.bodyfh.read(32768)
529 if not chunk:
530 if not chunk:
530 break
531 break
531
532
532 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
533 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
533 k, v in self.headers.items()]
534 k, v in self.headers.items()]
534 write = self._startresponse(pycompat.sysstr(self.status),
535 write = self._startresponse(pycompat.sysstr(self.status),
535 strheaders)
536 strheaders)
536
537
537 if self._bodybytes:
538 if self._bodybytes:
538 yield self._bodybytes
539 yield self._bodybytes
539 elif self._bodygen:
540 elif self._bodygen:
540 for chunk in self._bodygen:
541 for chunk in self._bodygen:
541 yield chunk
542 yield chunk
542 elif self._bodywillwrite:
543 elif self._bodywillwrite:
543 self._bodywritefn = write
544 self._bodywritefn = write
544 else:
545 else:
545 error.ProgrammingError('do not know how to send body')
546 error.ProgrammingError('do not know how to send body')
546
547
547 def getbodyfile(self):
548 def getbodyfile(self):
548 """Obtain a file object like object representing the response body.
549 """Obtain a file object like object representing the response body.
549
550
550 For this to work, you must call ``setbodywillwrite()`` and then
551 For this to work, you must call ``setbodywillwrite()`` and then
551 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
552 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
552 function won't run to completion unless the generator is advanced. The
553 function won't run to completion unless the generator is advanced. The
553 generator yields not items. The easiest way to consume it is with
554 generator yields not items. The easiest way to consume it is with
554 ``list(res.sendresponse())``, which should resolve to an empty list -
555 ``list(res.sendresponse())``, which should resolve to an empty list -
555 ``[]``.
556 ``[]``.
556 """
557 """
557 if not self._bodywillwrite:
558 if not self._bodywillwrite:
558 raise error.ProgrammingError('must call setbodywillwrite() first')
559 raise error.ProgrammingError('must call setbodywillwrite() first')
559
560
560 if not self._started:
561 if not self._started:
561 raise error.ProgrammingError('must call sendresponse() first; did '
562 raise error.ProgrammingError('must call sendresponse() first; did '
562 'you remember to consume it since it '
563 'you remember to consume it since it '
563 'is a generator?')
564 'is a generator?')
564
565
565 assert self._bodywritefn
566 assert self._bodywritefn
566 return offsettrackingwriter(self._bodywritefn)
567 return offsettrackingwriter(self._bodywritefn)
567
568
568 def wsgiapplication(app_maker):
569 def wsgiapplication(app_maker):
569 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
570 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
570 can and should now be used as a WSGI application.'''
571 can and should now be used as a WSGI application.'''
571 application = app_maker()
572 application = app_maker()
572 def run_wsgi(env, respond):
573 def run_wsgi(env, respond):
573 return application(env, respond)
574 return application(env, respond)
574 return run_wsgi
575 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now