##// END OF EJS Templates
hgweb: refactor multirequest to be a dict of lists...
Gregory Szorc -
r37012:44467a4d default
parent child Browse files
Show More
@@ -1,585 +1,558
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import wsgiref.headers as wsgiheaders
11 import wsgiref.headers as wsgiheaders
12 #import wsgiref.validate
12 #import wsgiref.validate
13
13
14 from ..thirdparty import (
14 from ..thirdparty import (
15 attr,
15 attr,
16 )
16 )
17 from .. import (
17 from .. import (
18 error,
18 error,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22
22
23 class multidict(object):
23 class multidict(object):
24 """A dict like object that can store multiple values for a key.
24 """A dict like object that can store multiple values for a key.
25
25
26 Used to store parsed request parameters.
26 Used to store parsed request parameters.
27
27
28 This is inspired by WebOb's class of the same name.
28 This is inspired by WebOb's class of the same name.
29 """
29 """
30 def __init__(self):
30 def __init__(self):
31 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
31 self._items = {}
32 # don't rely on parameters that much, so it shouldn't be a perf issue.
33 # we can always add dict for fast lookups.
34 self._items = []
35
32
36 def __getitem__(self, key):
33 def __getitem__(self, key):
37 """Returns the last set value for a key."""
34 """Returns the last set value for a key."""
38 for k, v in reversed(self._items):
35 return self._items[key][-1]
39 if k == key:
40 return v
41
42 raise KeyError(key)
43
36
44 def __setitem__(self, key, value):
37 def __setitem__(self, key, value):
45 """Replace a values for a key with a new value."""
38 """Replace a values for a key with a new value."""
46 try:
39 self._items[key] = [value]
47 del self[key]
48 except KeyError:
49 pass
50
51 self._items.append((key, value))
52
40
53 def __delitem__(self, key):
41 def __delitem__(self, key):
54 """Delete all values for a key."""
42 """Delete all values for a key."""
55 oldlen = len(self._items)
43 del self._items[key]
56
57 self._items[:] = [(k, v) for k, v in self._items if k != key]
58
59 if oldlen == len(self._items):
60 raise KeyError(key)
61
44
62 def __contains__(self, key):
45 def __contains__(self, key):
63 return any(k == key for k, v in self._items)
46 return key in self._items
64
47
65 def __len__(self):
48 def __len__(self):
66 return len(self._items)
49 return len(self._items)
67
50
68 def get(self, key, default=None):
51 def get(self, key, default=None):
69 try:
52 try:
70 return self.__getitem__(key)
53 return self.__getitem__(key)
71 except KeyError:
54 except KeyError:
72 return default
55 return default
73
56
74 def add(self, key, value):
57 def add(self, key, value):
75 """Add a new value for a key. Does not replace existing values."""
58 """Add a new value for a key. Does not replace existing values."""
76 self._items.append((key, value))
59 self._items.setdefault(key, []).append(value)
77
60
78 def getall(self, key):
61 def getall(self, key):
79 """Obtains all values for a key."""
62 """Obtains all values for a key."""
80 return [v for k, v in self._items if k == key]
63 return self._items.get(key, [])
81
64
82 def getone(self, key):
65 def getone(self, key):
83 """Obtain a single value for a key.
66 """Obtain a single value for a key.
84
67
85 Raises KeyError if key not defined or it has multiple values set.
68 Raises KeyError if key not defined or it has multiple values set.
86 """
69 """
87 vals = self.getall(key)
70 vals = self._items[key]
88
89 if not vals:
90 raise KeyError(key)
91
71
92 if len(vals) > 1:
72 if len(vals) > 1:
93 raise KeyError('multiple values for %r' % key)
73 raise KeyError('multiple values for %r' % key)
94
74
95 return vals[0]
75 return vals[0]
96
76
97 def asdictoflists(self):
77 def asdictoflists(self):
98 d = {}
78 return {k: list(v) for k, v in self._items.iteritems()}
99 for k, v in self._items:
100 if k in d:
101 d[k].append(v)
102 else:
103 d[k] = [v]
104
105 return d
106
79
107 @attr.s(frozen=True)
80 @attr.s(frozen=True)
108 class parsedrequest(object):
81 class parsedrequest(object):
109 """Represents a parsed WSGI request.
82 """Represents a parsed WSGI request.
110
83
111 Contains both parsed parameters as well as a handle on the input stream.
84 Contains both parsed parameters as well as a handle on the input stream.
112 """
85 """
113
86
114 # Request method.
87 # Request method.
115 method = attr.ib()
88 method = attr.ib()
116 # Full URL for this request.
89 # Full URL for this request.
117 url = attr.ib()
90 url = attr.ib()
118 # URL without any path components. Just <proto>://<host><port>.
91 # URL without any path components. Just <proto>://<host><port>.
119 baseurl = attr.ib()
92 baseurl = attr.ib()
120 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
121 # of HTTP: Host header for hostname. This is likely what clients used.
94 # of HTTP: Host header for hostname. This is likely what clients used.
122 advertisedurl = attr.ib()
95 advertisedurl = attr.ib()
123 advertisedbaseurl = attr.ib()
96 advertisedbaseurl = attr.ib()
124 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
125 urlscheme = attr.ib()
98 urlscheme = attr.ib()
126 # Value of REMOTE_USER, if set, or None.
99 # Value of REMOTE_USER, if set, or None.
127 remoteuser = attr.ib()
100 remoteuser = attr.ib()
128 # Value of REMOTE_HOST, if set, or None.
101 # Value of REMOTE_HOST, if set, or None.
129 remotehost = attr.ib()
102 remotehost = attr.ib()
130 # Relative WSGI application path. If defined, will begin with a
103 # Relative WSGI application path. If defined, will begin with a
131 # ``/``.
104 # ``/``.
132 apppath = attr.ib()
105 apppath = attr.ib()
133 # List of path parts to be used for dispatch.
106 # List of path parts to be used for dispatch.
134 dispatchparts = attr.ib()
107 dispatchparts = attr.ib()
135 # URL path component (no query string) used for dispatch. Can be
108 # URL path component (no query string) used for dispatch. Can be
136 # ``None`` to signal no path component given to the request, an
109 # ``None`` to signal no path component given to the request, an
137 # empty string to signal a request to the application's root URL,
110 # empty string to signal a request to the application's root URL,
138 # or a string not beginning with ``/`` containing the requested
111 # or a string not beginning with ``/`` containing the requested
139 # path under the application.
112 # path under the application.
140 dispatchpath = attr.ib()
113 dispatchpath = attr.ib()
141 # The name of the repository being accessed.
114 # The name of the repository being accessed.
142 reponame = attr.ib()
115 reponame = attr.ib()
143 # Raw query string (part after "?" in URL).
116 # Raw query string (part after "?" in URL).
144 querystring = attr.ib()
117 querystring = attr.ib()
145 # multidict of query string parameters.
118 # multidict of query string parameters.
146 qsparams = attr.ib()
119 qsparams = attr.ib()
147 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # wsgiref.headers.Headers instance. Operates like a dict with case
148 # insensitive keys.
121 # insensitive keys.
149 headers = attr.ib()
122 headers = attr.ib()
150 # Request body input stream.
123 # Request body input stream.
151 bodyfh = attr.ib()
124 bodyfh = attr.ib()
152 # WSGI environment dict, unmodified.
125 # WSGI environment dict, unmodified.
153 rawenv = attr.ib()
126 rawenv = attr.ib()
154
127
155 def parserequestfromenv(env, reponame=None, altbaseurl=None):
128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
156 """Parse URL components from environment variables.
129 """Parse URL components from environment variables.
157
130
158 WSGI defines request attributes via environment variables. This function
131 WSGI defines request attributes via environment variables. This function
159 parses the environment variables into a data structure.
132 parses the environment variables into a data structure.
160
133
161 If ``reponame`` is defined, the leading path components matching that
134 If ``reponame`` is defined, the leading path components matching that
162 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
163 This simulates the world view of a WSGI application that processes
136 This simulates the world view of a WSGI application that processes
164 requests from the base URL of a repo.
137 requests from the base URL of a repo.
165
138
166 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
167 is defined, it is used - instead of the WSGI environment variables - for
140 is defined, it is used - instead of the WSGI environment variables - for
168 constructing URL components up to and including the WSGI application path.
141 constructing URL components up to and including the WSGI application path.
169 For example, if the current WSGI application is at ``/repo`` and a request
142 For example, if the current WSGI application is at ``/repo`` and a request
170 is made to ``/rev/@`` with this argument set to
143 is made to ``/rev/@`` with this argument set to
171 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
172 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
173 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
174 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
175 """
148 """
176 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
177
150
178 # We first validate that the incoming object conforms with the WSGI spec.
151 # We first validate that the incoming object conforms with the WSGI spec.
179 # We only want to be dealing with spec-conforming WSGI implementations.
152 # We only want to be dealing with spec-conforming WSGI implementations.
180 # TODO enable this once we fix internal violations.
153 # TODO enable this once we fix internal violations.
181 #wsgiref.validate.check_environ(env)
154 #wsgiref.validate.check_environ(env)
182
155
183 # PEP-0333 states that environment keys and values are native strings
156 # PEP-0333 states that environment keys and values are native strings
184 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
185 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
186 # in Mercurial, so mass convert string keys and values to bytes.
159 # in Mercurial, so mass convert string keys and values to bytes.
187 if pycompat.ispy3:
160 if pycompat.ispy3:
188 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
189 env = {k: v.encode('latin-1') if isinstance(v, str) else v
162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
190 for k, v in env.iteritems()}
163 for k, v in env.iteritems()}
191
164
192 if altbaseurl:
165 if altbaseurl:
193 altbaseurl = util.url(altbaseurl)
166 altbaseurl = util.url(altbaseurl)
194
167
195 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
196 # the environment variables.
169 # the environment variables.
197 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
198 # how URLs are reconstructed.
171 # how URLs are reconstructed.
199 fullurl = env['wsgi.url_scheme'] + '://'
172 fullurl = env['wsgi.url_scheme'] + '://'
200
173
201 if altbaseurl and altbaseurl.scheme:
174 if altbaseurl and altbaseurl.scheme:
202 advertisedfullurl = altbaseurl.scheme + '://'
175 advertisedfullurl = altbaseurl.scheme + '://'
203 else:
176 else:
204 advertisedfullurl = fullurl
177 advertisedfullurl = fullurl
205
178
206 def addport(s, port):
179 def addport(s, port):
207 if s.startswith('https://'):
180 if s.startswith('https://'):
208 if port != '443':
181 if port != '443':
209 s += ':' + port
182 s += ':' + port
210 else:
183 else:
211 if port != '80':
184 if port != '80':
212 s += ':' + port
185 s += ':' + port
213
186
214 return s
187 return s
215
188
216 if env.get('HTTP_HOST'):
189 if env.get('HTTP_HOST'):
217 fullurl += env['HTTP_HOST']
190 fullurl += env['HTTP_HOST']
218 else:
191 else:
219 fullurl += env['SERVER_NAME']
192 fullurl += env['SERVER_NAME']
220 fullurl = addport(fullurl, env['SERVER_PORT'])
193 fullurl = addport(fullurl, env['SERVER_PORT'])
221
194
222 if altbaseurl and altbaseurl.host:
195 if altbaseurl and altbaseurl.host:
223 advertisedfullurl += altbaseurl.host
196 advertisedfullurl += altbaseurl.host
224
197
225 if altbaseurl.port:
198 if altbaseurl.port:
226 port = altbaseurl.port
199 port = altbaseurl.port
227 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
228 port = '80'
201 port = '80'
229 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
230 port = '443'
203 port = '443'
231 else:
204 else:
232 port = env['SERVER_PORT']
205 port = env['SERVER_PORT']
233
206
234 advertisedfullurl = addport(advertisedfullurl, port)
207 advertisedfullurl = addport(advertisedfullurl, port)
235 else:
208 else:
236 advertisedfullurl += env['SERVER_NAME']
209 advertisedfullurl += env['SERVER_NAME']
237 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
238
211
239 baseurl = fullurl
212 baseurl = fullurl
240 advertisedbaseurl = advertisedfullurl
213 advertisedbaseurl = advertisedfullurl
241
214
242 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
243 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
244
217
245 if altbaseurl:
218 if altbaseurl:
246 path = altbaseurl.path or ''
219 path = altbaseurl.path or ''
247 if path and not path.startswith('/'):
220 if path and not path.startswith('/'):
248 path = '/' + path
221 path = '/' + path
249 advertisedfullurl += util.urlreq.quote(path)
222 advertisedfullurl += util.urlreq.quote(path)
250 else:
223 else:
251 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
252
225
253 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
254
227
255 if env.get('QUERY_STRING'):
228 if env.get('QUERY_STRING'):
256 fullurl += '?' + env['QUERY_STRING']
229 fullurl += '?' + env['QUERY_STRING']
257 advertisedfullurl += '?' + env['QUERY_STRING']
230 advertisedfullurl += '?' + env['QUERY_STRING']
258
231
259 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
260 # that represents the repository being dispatched to. When computing
233 # that represents the repository being dispatched to. When computing
261 # the dispatch info, we ignore these leading path components.
234 # the dispatch info, we ignore these leading path components.
262
235
263 if altbaseurl:
236 if altbaseurl:
264 apppath = altbaseurl.path or ''
237 apppath = altbaseurl.path or ''
265 if apppath and not apppath.startswith('/'):
238 if apppath and not apppath.startswith('/'):
266 apppath = '/' + apppath
239 apppath = '/' + apppath
267 else:
240 else:
268 apppath = env.get('SCRIPT_NAME', '')
241 apppath = env.get('SCRIPT_NAME', '')
269
242
270 if reponame:
243 if reponame:
271 repoprefix = '/' + reponame.strip('/')
244 repoprefix = '/' + reponame.strip('/')
272
245
273 if not env.get('PATH_INFO'):
246 if not env.get('PATH_INFO'):
274 raise error.ProgrammingError('reponame requires PATH_INFO')
247 raise error.ProgrammingError('reponame requires PATH_INFO')
275
248
276 if not env['PATH_INFO'].startswith(repoprefix):
249 if not env['PATH_INFO'].startswith(repoprefix):
277 raise error.ProgrammingError('PATH_INFO does not begin with repo '
250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
278 'name: %s (%s)' % (env['PATH_INFO'],
251 'name: %s (%s)' % (env['PATH_INFO'],
279 reponame))
252 reponame))
280
253
281 dispatchpath = env['PATH_INFO'][len(repoprefix):]
254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
282
255
283 if dispatchpath and not dispatchpath.startswith('/'):
256 if dispatchpath and not dispatchpath.startswith('/'):
284 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
285 'not end at path delimiter: %s (%s)' %
258 'not end at path delimiter: %s (%s)' %
286 (env['PATH_INFO'], reponame))
259 (env['PATH_INFO'], reponame))
287
260
288 apppath = apppath.rstrip('/') + repoprefix
261 apppath = apppath.rstrip('/') + repoprefix
289 dispatchparts = dispatchpath.strip('/').split('/')
262 dispatchparts = dispatchpath.strip('/').split('/')
290 dispatchpath = '/'.join(dispatchparts)
263 dispatchpath = '/'.join(dispatchparts)
291
264
292 elif 'PATH_INFO' in env:
265 elif 'PATH_INFO' in env:
293 if env['PATH_INFO'].strip('/'):
266 if env['PATH_INFO'].strip('/'):
294 dispatchparts = env['PATH_INFO'].strip('/').split('/')
267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
295 dispatchpath = '/'.join(dispatchparts)
268 dispatchpath = '/'.join(dispatchparts)
296 else:
269 else:
297 dispatchparts = []
270 dispatchparts = []
298 dispatchpath = ''
271 dispatchpath = ''
299 else:
272 else:
300 dispatchparts = []
273 dispatchparts = []
301 dispatchpath = None
274 dispatchpath = None
302
275
303 querystring = env.get('QUERY_STRING', '')
276 querystring = env.get('QUERY_STRING', '')
304
277
305 # We store as a list so we have ordering information. We also store as
278 # We store as a list so we have ordering information. We also store as
306 # a dict to facilitate fast lookup.
279 # a dict to facilitate fast lookup.
307 qsparams = multidict()
280 qsparams = multidict()
308 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
309 qsparams.add(k, v)
282 qsparams.add(k, v)
310
283
311 # HTTP_* keys contain HTTP request headers. The Headers structure should
284 # HTTP_* keys contain HTTP request headers. The Headers structure should
312 # perform case normalization for us. We just rewrite underscore to dash
285 # perform case normalization for us. We just rewrite underscore to dash
313 # so keys match what likely went over the wire.
286 # so keys match what likely went over the wire.
314 headers = []
287 headers = []
315 for k, v in env.iteritems():
288 for k, v in env.iteritems():
316 if k.startswith('HTTP_'):
289 if k.startswith('HTTP_'):
317 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
318
291
319 headers = wsgiheaders.Headers(headers)
292 headers = wsgiheaders.Headers(headers)
320
293
321 # This is kind of a lie because the HTTP header wasn't explicitly
294 # This is kind of a lie because the HTTP header wasn't explicitly
322 # sent. But for all intents and purposes it should be OK to lie about
295 # sent. But for all intents and purposes it should be OK to lie about
323 # this, since a consumer will either either value to determine how many
296 # this, since a consumer will either either value to determine how many
324 # bytes are available to read.
297 # bytes are available to read.
325 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
326 headers['Content-Length'] = env['CONTENT_LENGTH']
299 headers['Content-Length'] = env['CONTENT_LENGTH']
327
300
328 bodyfh = env['wsgi.input']
301 bodyfh = env['wsgi.input']
329 if 'Content-Length' in headers:
302 if 'Content-Length' in headers:
330 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
303 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
331
304
332 return parsedrequest(method=env['REQUEST_METHOD'],
305 return parsedrequest(method=env['REQUEST_METHOD'],
333 url=fullurl, baseurl=baseurl,
306 url=fullurl, baseurl=baseurl,
334 advertisedurl=advertisedfullurl,
307 advertisedurl=advertisedfullurl,
335 advertisedbaseurl=advertisedbaseurl,
308 advertisedbaseurl=advertisedbaseurl,
336 urlscheme=env['wsgi.url_scheme'],
309 urlscheme=env['wsgi.url_scheme'],
337 remoteuser=env.get('REMOTE_USER'),
310 remoteuser=env.get('REMOTE_USER'),
338 remotehost=env.get('REMOTE_HOST'),
311 remotehost=env.get('REMOTE_HOST'),
339 apppath=apppath,
312 apppath=apppath,
340 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
313 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
341 reponame=reponame,
314 reponame=reponame,
342 querystring=querystring,
315 querystring=querystring,
343 qsparams=qsparams,
316 qsparams=qsparams,
344 headers=headers,
317 headers=headers,
345 bodyfh=bodyfh,
318 bodyfh=bodyfh,
346 rawenv=env)
319 rawenv=env)
347
320
348 class offsettrackingwriter(object):
321 class offsettrackingwriter(object):
349 """A file object like object that is append only and tracks write count.
322 """A file object like object that is append only and tracks write count.
350
323
351 Instances are bound to a callable. This callable is called with data
324 Instances are bound to a callable. This callable is called with data
352 whenever a ``write()`` is attempted.
325 whenever a ``write()`` is attempted.
353
326
354 Instances track the amount of written data so they can answer ``tell()``
327 Instances track the amount of written data so they can answer ``tell()``
355 requests.
328 requests.
356
329
357 The intent of this class is to wrap the ``write()`` function returned by
330 The intent of this class is to wrap the ``write()`` function returned by
358 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
331 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
359 not a file object, it doesn't implement other file object methods.
332 not a file object, it doesn't implement other file object methods.
360 """
333 """
361 def __init__(self, writefn):
334 def __init__(self, writefn):
362 self._write = writefn
335 self._write = writefn
363 self._offset = 0
336 self._offset = 0
364
337
365 def write(self, s):
338 def write(self, s):
366 res = self._write(s)
339 res = self._write(s)
367 # Some Python objects don't report the number of bytes written.
340 # Some Python objects don't report the number of bytes written.
368 if res is None:
341 if res is None:
369 self._offset += len(s)
342 self._offset += len(s)
370 else:
343 else:
371 self._offset += res
344 self._offset += res
372
345
373 def flush(self):
346 def flush(self):
374 pass
347 pass
375
348
376 def tell(self):
349 def tell(self):
377 return self._offset
350 return self._offset
378
351
379 class wsgiresponse(object):
352 class wsgiresponse(object):
380 """Represents a response to a WSGI request.
353 """Represents a response to a WSGI request.
381
354
382 A response consists of a status line, headers, and a body.
355 A response consists of a status line, headers, and a body.
383
356
384 Consumers must populate the ``status`` and ``headers`` fields and
357 Consumers must populate the ``status`` and ``headers`` fields and
385 make a call to a ``setbody*()`` method before the response can be
358 make a call to a ``setbody*()`` method before the response can be
386 issued.
359 issued.
387
360
388 When it is time to start sending the response over the wire,
361 When it is time to start sending the response over the wire,
389 ``sendresponse()`` is called. It handles emitting the header portion
362 ``sendresponse()`` is called. It handles emitting the header portion
390 of the response message. It then yields chunks of body data to be
363 of the response message. It then yields chunks of body data to be
391 written to the peer. Typically, the WSGI application itself calls
364 written to the peer. Typically, the WSGI application itself calls
392 and returns the value from ``sendresponse()``.
365 and returns the value from ``sendresponse()``.
393 """
366 """
394
367
395 def __init__(self, req, startresponse):
368 def __init__(self, req, startresponse):
396 """Create an empty response tied to a specific request.
369 """Create an empty response tied to a specific request.
397
370
398 ``req`` is a ``parsedrequest``. ``startresponse`` is the
371 ``req`` is a ``parsedrequest``. ``startresponse`` is the
399 ``start_response`` function passed to the WSGI application.
372 ``start_response`` function passed to the WSGI application.
400 """
373 """
401 self._req = req
374 self._req = req
402 self._startresponse = startresponse
375 self._startresponse = startresponse
403
376
404 self.status = None
377 self.status = None
405 self.headers = wsgiheaders.Headers([])
378 self.headers = wsgiheaders.Headers([])
406
379
407 self._bodybytes = None
380 self._bodybytes = None
408 self._bodygen = None
381 self._bodygen = None
409 self._bodywillwrite = False
382 self._bodywillwrite = False
410 self._started = False
383 self._started = False
411 self._bodywritefn = None
384 self._bodywritefn = None
412
385
413 def _verifybody(self):
386 def _verifybody(self):
414 if (self._bodybytes is not None or self._bodygen is not None
387 if (self._bodybytes is not None or self._bodygen is not None
415 or self._bodywillwrite):
388 or self._bodywillwrite):
416 raise error.ProgrammingError('cannot define body multiple times')
389 raise error.ProgrammingError('cannot define body multiple times')
417
390
418 def setbodybytes(self, b):
391 def setbodybytes(self, b):
419 """Define the response body as static bytes.
392 """Define the response body as static bytes.
420
393
421 The empty string signals that there is no response body.
394 The empty string signals that there is no response body.
422 """
395 """
423 self._verifybody()
396 self._verifybody()
424 self._bodybytes = b
397 self._bodybytes = b
425 self.headers['Content-Length'] = '%d' % len(b)
398 self.headers['Content-Length'] = '%d' % len(b)
426
399
427 def setbodygen(self, gen):
400 def setbodygen(self, gen):
428 """Define the response body as a generator of bytes."""
401 """Define the response body as a generator of bytes."""
429 self._verifybody()
402 self._verifybody()
430 self._bodygen = gen
403 self._bodygen = gen
431
404
432 def setbodywillwrite(self):
405 def setbodywillwrite(self):
433 """Signal an intent to use write() to emit the response body.
406 """Signal an intent to use write() to emit the response body.
434
407
435 **This is the least preferred way to send a body.**
408 **This is the least preferred way to send a body.**
436
409
437 It is preferred for WSGI applications to emit a generator of chunks
410 It is preferred for WSGI applications to emit a generator of chunks
438 constituting the response body. However, some consumers can't emit
411 constituting the response body. However, some consumers can't emit
439 data this way. So, WSGI provides a way to obtain a ``write(data)``
412 data this way. So, WSGI provides a way to obtain a ``write(data)``
440 function that can be used to synchronously perform an unbuffered
413 function that can be used to synchronously perform an unbuffered
441 write.
414 write.
442
415
443 Calling this function signals an intent to produce the body in this
416 Calling this function signals an intent to produce the body in this
444 manner.
417 manner.
445 """
418 """
446 self._verifybody()
419 self._verifybody()
447 self._bodywillwrite = True
420 self._bodywillwrite = True
448
421
449 def sendresponse(self):
422 def sendresponse(self):
450 """Send the generated response to the client.
423 """Send the generated response to the client.
451
424
452 Before this is called, ``status`` must be set and one of
425 Before this is called, ``status`` must be set and one of
453 ``setbodybytes()`` or ``setbodygen()`` must be called.
426 ``setbodybytes()`` or ``setbodygen()`` must be called.
454
427
455 Calling this method multiple times is not allowed.
428 Calling this method multiple times is not allowed.
456 """
429 """
457 if self._started:
430 if self._started:
458 raise error.ProgrammingError('sendresponse() called multiple times')
431 raise error.ProgrammingError('sendresponse() called multiple times')
459
432
460 self._started = True
433 self._started = True
461
434
462 if not self.status:
435 if not self.status:
463 raise error.ProgrammingError('status line not defined')
436 raise error.ProgrammingError('status line not defined')
464
437
465 if (self._bodybytes is None and self._bodygen is None
438 if (self._bodybytes is None and self._bodygen is None
466 and not self._bodywillwrite):
439 and not self._bodywillwrite):
467 raise error.ProgrammingError('response body not defined')
440 raise error.ProgrammingError('response body not defined')
468
441
469 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
442 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
470 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
443 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
471 # and SHOULD NOT generate other headers unless they could be used
444 # and SHOULD NOT generate other headers unless they could be used
472 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
445 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
473 # states that no response body can be issued. Content-Length can
446 # states that no response body can be issued. Content-Length can
474 # be sent. But if it is present, it should be the size of the response
447 # be sent. But if it is present, it should be the size of the response
475 # that wasn't transferred.
448 # that wasn't transferred.
476 if self.status.startswith('304 '):
449 if self.status.startswith('304 '):
477 # setbodybytes('') will set C-L to 0. This doesn't conform with the
450 # setbodybytes('') will set C-L to 0. This doesn't conform with the
478 # spec. So remove it.
451 # spec. So remove it.
479 if self.headers.get('Content-Length') == '0':
452 if self.headers.get('Content-Length') == '0':
480 del self.headers['Content-Length']
453 del self.headers['Content-Length']
481
454
482 # Strictly speaking, this is too strict. But until it causes
455 # Strictly speaking, this is too strict. But until it causes
483 # problems, let's be strict.
456 # problems, let's be strict.
484 badheaders = {k for k in self.headers.keys()
457 badheaders = {k for k in self.headers.keys()
485 if k.lower() not in ('date', 'etag', 'expires',
458 if k.lower() not in ('date', 'etag', 'expires',
486 'cache-control',
459 'cache-control',
487 'content-location',
460 'content-location',
488 'vary')}
461 'vary')}
489 if badheaders:
462 if badheaders:
490 raise error.ProgrammingError(
463 raise error.ProgrammingError(
491 'illegal header on 304 response: %s' %
464 'illegal header on 304 response: %s' %
492 ', '.join(sorted(badheaders)))
465 ', '.join(sorted(badheaders)))
493
466
494 if self._bodygen is not None or self._bodywillwrite:
467 if self._bodygen is not None or self._bodywillwrite:
495 raise error.ProgrammingError("must use setbodybytes('') with "
468 raise error.ProgrammingError("must use setbodybytes('') with "
496 "304 responses")
469 "304 responses")
497
470
498 # Various HTTP clients (notably httplib) won't read the HTTP response
471 # Various HTTP clients (notably httplib) won't read the HTTP response
499 # until the HTTP request has been sent in full. If servers (us) send a
472 # until the HTTP request has been sent in full. If servers (us) send a
500 # response before the HTTP request has been fully sent, the connection
473 # response before the HTTP request has been fully sent, the connection
501 # may deadlock because neither end is reading.
474 # may deadlock because neither end is reading.
502 #
475 #
503 # We work around this by "draining" the request data before
476 # We work around this by "draining" the request data before
504 # sending any response in some conditions.
477 # sending any response in some conditions.
505 drain = False
478 drain = False
506 close = False
479 close = False
507
480
508 # If the client sent Expect: 100-continue, we assume it is smart enough
481 # If the client sent Expect: 100-continue, we assume it is smart enough
509 # to deal with the server sending a response before reading the request.
482 # to deal with the server sending a response before reading the request.
510 # (httplib doesn't do this.)
483 # (httplib doesn't do this.)
511 if self._req.headers.get('Expect', '').lower() == '100-continue':
484 if self._req.headers.get('Expect', '').lower() == '100-continue':
512 pass
485 pass
513 # Only tend to request methods that have bodies. Strictly speaking,
486 # Only tend to request methods that have bodies. Strictly speaking,
514 # we should sniff for a body. But this is fine for our existing
487 # we should sniff for a body. But this is fine for our existing
515 # WSGI applications.
488 # WSGI applications.
516 elif self._req.method not in ('POST', 'PUT'):
489 elif self._req.method not in ('POST', 'PUT'):
517 pass
490 pass
518 else:
491 else:
519 # If we don't know how much data to read, there's no guarantee
492 # If we don't know how much data to read, there's no guarantee
520 # that we can drain the request responsibly. The WSGI
493 # that we can drain the request responsibly. The WSGI
521 # specification only says that servers *should* ensure the
494 # specification only says that servers *should* ensure the
522 # input stream doesn't overrun the actual request. So there's
495 # input stream doesn't overrun the actual request. So there's
523 # no guarantee that reading until EOF won't corrupt the stream
496 # no guarantee that reading until EOF won't corrupt the stream
524 # state.
497 # state.
525 if not isinstance(self._req.bodyfh, util.cappedreader):
498 if not isinstance(self._req.bodyfh, util.cappedreader):
526 close = True
499 close = True
527 else:
500 else:
528 # We /could/ only drain certain HTTP response codes. But 200 and
501 # We /could/ only drain certain HTTP response codes. But 200 and
529 # non-200 wire protocol responses both require draining. Since
502 # non-200 wire protocol responses both require draining. Since
530 # we have a capped reader in place for all situations where we
503 # we have a capped reader in place for all situations where we
531 # drain, it is safe to read from that stream. We'll either do
504 # drain, it is safe to read from that stream. We'll either do
532 # a drain or no-op if we're already at EOF.
505 # a drain or no-op if we're already at EOF.
533 drain = True
506 drain = True
534
507
535 if close:
508 if close:
536 self.headers['Connection'] = 'Close'
509 self.headers['Connection'] = 'Close'
537
510
538 if drain:
511 if drain:
539 assert isinstance(self._req.bodyfh, util.cappedreader)
512 assert isinstance(self._req.bodyfh, util.cappedreader)
540 while True:
513 while True:
541 chunk = self._req.bodyfh.read(32768)
514 chunk = self._req.bodyfh.read(32768)
542 if not chunk:
515 if not chunk:
543 break
516 break
544
517
545 write = self._startresponse(pycompat.sysstr(self.status),
518 write = self._startresponse(pycompat.sysstr(self.status),
546 self.headers.items())
519 self.headers.items())
547
520
548 if self._bodybytes:
521 if self._bodybytes:
549 yield self._bodybytes
522 yield self._bodybytes
550 elif self._bodygen:
523 elif self._bodygen:
551 for chunk in self._bodygen:
524 for chunk in self._bodygen:
552 yield chunk
525 yield chunk
553 elif self._bodywillwrite:
526 elif self._bodywillwrite:
554 self._bodywritefn = write
527 self._bodywritefn = write
555 else:
528 else:
556 error.ProgrammingError('do not know how to send body')
529 error.ProgrammingError('do not know how to send body')
557
530
558 def getbodyfile(self):
531 def getbodyfile(self):
559 """Obtain a file object like object representing the response body.
532 """Obtain a file object like object representing the response body.
560
533
561 For this to work, you must call ``setbodywillwrite()`` and then
534 For this to work, you must call ``setbodywillwrite()`` and then
562 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
535 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
563 function won't run to completion unless the generator is advanced. The
536 function won't run to completion unless the generator is advanced. The
564 generator yields not items. The easiest way to consume it is with
537 generator yields not items. The easiest way to consume it is with
565 ``list(res.sendresponse())``, which should resolve to an empty list -
538 ``list(res.sendresponse())``, which should resolve to an empty list -
566 ``[]``.
539 ``[]``.
567 """
540 """
568 if not self._bodywillwrite:
541 if not self._bodywillwrite:
569 raise error.ProgrammingError('must call setbodywillwrite() first')
542 raise error.ProgrammingError('must call setbodywillwrite() first')
570
543
571 if not self._started:
544 if not self._started:
572 raise error.ProgrammingError('must call sendresponse() first; did '
545 raise error.ProgrammingError('must call sendresponse() first; did '
573 'you remember to consume it since it '
546 'you remember to consume it since it '
574 'is a generator?')
547 'is a generator?')
575
548
576 assert self._bodywritefn
549 assert self._bodywritefn
577 return offsettrackingwriter(self._bodywritefn)
550 return offsettrackingwriter(self._bodywritefn)
578
551
579 def wsgiapplication(app_maker):
552 def wsgiapplication(app_maker):
580 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
553 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
581 can and should now be used as a WSGI application.'''
554 can and should now be used as a WSGI application.'''
582 application = app_maker()
555 application = app_maker()
583 def run_wsgi(env, respond):
556 def run_wsgi(env, respond):
584 return application(env, respond)
557 return application(env, respond)
585 return run_wsgi
558 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now