##// END OF EJS Templates
hgweb: fallback to checking wsgireq.env for REPO_NAME for 3rd party hosting...
Matt Harbison -
r37634:5e81cf96 default
parent child Browse files
Show More
@@ -1,564 +1,570
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 #import wsgiref.validate
11 #import wsgiref.validate
12
12
13 from ..thirdparty import (
13 from ..thirdparty import (
14 attr,
14 attr,
15 )
15 )
16 from .. import (
16 from .. import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 class multidict(object):
22 class multidict(object):
23 """A dict like object that can store multiple values for a key.
23 """A dict like object that can store multiple values for a key.
24
24
25 Used to store parsed request parameters.
25 Used to store parsed request parameters.
26
26
27 This is inspired by WebOb's class of the same name.
27 This is inspired by WebOb's class of the same name.
28 """
28 """
29 def __init__(self):
29 def __init__(self):
30 self._items = {}
30 self._items = {}
31
31
32 def __getitem__(self, key):
32 def __getitem__(self, key):
33 """Returns the last set value for a key."""
33 """Returns the last set value for a key."""
34 return self._items[key][-1]
34 return self._items[key][-1]
35
35
36 def __setitem__(self, key, value):
36 def __setitem__(self, key, value):
37 """Replace a values for a key with a new value."""
37 """Replace a values for a key with a new value."""
38 self._items[key] = [value]
38 self._items[key] = [value]
39
39
40 def __delitem__(self, key):
40 def __delitem__(self, key):
41 """Delete all values for a key."""
41 """Delete all values for a key."""
42 del self._items[key]
42 del self._items[key]
43
43
44 def __contains__(self, key):
44 def __contains__(self, key):
45 return key in self._items
45 return key in self._items
46
46
47 def __len__(self):
47 def __len__(self):
48 return len(self._items)
48 return len(self._items)
49
49
50 def get(self, key, default=None):
50 def get(self, key, default=None):
51 try:
51 try:
52 return self.__getitem__(key)
52 return self.__getitem__(key)
53 except KeyError:
53 except KeyError:
54 return default
54 return default
55
55
56 def add(self, key, value):
56 def add(self, key, value):
57 """Add a new value for a key. Does not replace existing values."""
57 """Add a new value for a key. Does not replace existing values."""
58 self._items.setdefault(key, []).append(value)
58 self._items.setdefault(key, []).append(value)
59
59
60 def getall(self, key):
60 def getall(self, key):
61 """Obtains all values for a key."""
61 """Obtains all values for a key."""
62 return self._items.get(key, [])
62 return self._items.get(key, [])
63
63
64 def getone(self, key):
64 def getone(self, key):
65 """Obtain a single value for a key.
65 """Obtain a single value for a key.
66
66
67 Raises KeyError if key not defined or it has multiple values set.
67 Raises KeyError if key not defined or it has multiple values set.
68 """
68 """
69 vals = self._items[key]
69 vals = self._items[key]
70
70
71 if len(vals) > 1:
71 if len(vals) > 1:
72 raise KeyError('multiple values for %r' % key)
72 raise KeyError('multiple values for %r' % key)
73
73
74 return vals[0]
74 return vals[0]
75
75
76 def asdictoflists(self):
76 def asdictoflists(self):
77 return {k: list(v) for k, v in self._items.iteritems()}
77 return {k: list(v) for k, v in self._items.iteritems()}
78
78
79 @attr.s(frozen=True)
79 @attr.s(frozen=True)
80 class parsedrequest(object):
80 class parsedrequest(object):
81 """Represents a parsed WSGI request.
81 """Represents a parsed WSGI request.
82
82
83 Contains both parsed parameters as well as a handle on the input stream.
83 Contains both parsed parameters as well as a handle on the input stream.
84 """
84 """
85
85
86 # Request method.
86 # Request method.
87 method = attr.ib()
87 method = attr.ib()
88 # Full URL for this request.
88 # Full URL for this request.
89 url = attr.ib()
89 url = attr.ib()
90 # URL without any path components. Just <proto>://<host><port>.
90 # URL without any path components. Just <proto>://<host><port>.
91 baseurl = attr.ib()
91 baseurl = attr.ib()
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
92 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # of HTTP: Host header for hostname. This is likely what clients used.
93 # of HTTP: Host header for hostname. This is likely what clients used.
94 advertisedurl = attr.ib()
94 advertisedurl = attr.ib()
95 advertisedbaseurl = attr.ib()
95 advertisedbaseurl = attr.ib()
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
96 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 urlscheme = attr.ib()
97 urlscheme = attr.ib()
98 # Value of REMOTE_USER, if set, or None.
98 # Value of REMOTE_USER, if set, or None.
99 remoteuser = attr.ib()
99 remoteuser = attr.ib()
100 # Value of REMOTE_HOST, if set, or None.
100 # Value of REMOTE_HOST, if set, or None.
101 remotehost = attr.ib()
101 remotehost = attr.ib()
102 # Relative WSGI application path. If defined, will begin with a
102 # Relative WSGI application path. If defined, will begin with a
103 # ``/``.
103 # ``/``.
104 apppath = attr.ib()
104 apppath = attr.ib()
105 # List of path parts to be used for dispatch.
105 # List of path parts to be used for dispatch.
106 dispatchparts = attr.ib()
106 dispatchparts = attr.ib()
107 # URL path component (no query string) used for dispatch. Can be
107 # URL path component (no query string) used for dispatch. Can be
108 # ``None`` to signal no path component given to the request, an
108 # ``None`` to signal no path component given to the request, an
109 # empty string to signal a request to the application's root URL,
109 # empty string to signal a request to the application's root URL,
110 # or a string not beginning with ``/`` containing the requested
110 # or a string not beginning with ``/`` containing the requested
111 # path under the application.
111 # path under the application.
112 dispatchpath = attr.ib()
112 dispatchpath = attr.ib()
113 # The name of the repository being accessed.
113 # The name of the repository being accessed.
114 reponame = attr.ib()
114 reponame = attr.ib()
115 # Raw query string (part after "?" in URL).
115 # Raw query string (part after "?" in URL).
116 querystring = attr.ib()
116 querystring = attr.ib()
117 # multidict of query string parameters.
117 # multidict of query string parameters.
118 qsparams = attr.ib()
118 qsparams = attr.ib()
119 # wsgiref.headers.Headers instance. Operates like a dict with case
119 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # insensitive keys.
120 # insensitive keys.
121 headers = attr.ib()
121 headers = attr.ib()
122 # Request body input stream.
122 # Request body input stream.
123 bodyfh = attr.ib()
123 bodyfh = attr.ib()
124 # WSGI environment dict, unmodified.
124 # WSGI environment dict, unmodified.
125 rawenv = attr.ib()
125 rawenv = attr.ib()
126
126
127 def parserequestfromenv(env, reponame=None, altbaseurl=None):
127 def parserequestfromenv(env, reponame=None, altbaseurl=None):
128 """Parse URL components from environment variables.
128 """Parse URL components from environment variables.
129
129
130 WSGI defines request attributes via environment variables. This function
130 WSGI defines request attributes via environment variables. This function
131 parses the environment variables into a data structure.
131 parses the environment variables into a data structure.
132
132
133 If ``reponame`` is defined, the leading path components matching that
133 If ``reponame`` is defined, the leading path components matching that
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
134 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
135 This simulates the world view of a WSGI application that processes
135 This simulates the world view of a WSGI application that processes
136 requests from the base URL of a repo.
136 requests from the base URL of a repo.
137
137
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
138 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
139 is defined, it is used - instead of the WSGI environment variables - for
139 is defined, it is used - instead of the WSGI environment variables - for
140 constructing URL components up to and including the WSGI application path.
140 constructing URL components up to and including the WSGI application path.
141 For example, if the current WSGI application is at ``/repo`` and a request
141 For example, if the current WSGI application is at ``/repo`` and a request
142 is made to ``/rev/@`` with this argument set to
142 is made to ``/rev/@`` with this argument set to
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
143 ``http://myserver:9000/prefix``, the URL and path components will resolve as
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
144 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
145 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
146 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
147 """
147 """
148 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
148 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
149
149
150 # We first validate that the incoming object conforms with the WSGI spec.
150 # We first validate that the incoming object conforms with the WSGI spec.
151 # We only want to be dealing with spec-conforming WSGI implementations.
151 # We only want to be dealing with spec-conforming WSGI implementations.
152 # TODO enable this once we fix internal violations.
152 # TODO enable this once we fix internal violations.
153 #wsgiref.validate.check_environ(env)
153 #wsgiref.validate.check_environ(env)
154
154
155 # PEP-0333 states that environment keys and values are native strings
155 # PEP-0333 states that environment keys and values are native strings
156 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
156 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
157 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
157 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
158 # in Mercurial, so mass convert string keys and values to bytes.
158 # in Mercurial, so mass convert string keys and values to bytes.
159 if pycompat.ispy3:
159 if pycompat.ispy3:
160 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
160 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
161 env = {k: v.encode('latin-1') if isinstance(v, str) else v
161 env = {k: v.encode('latin-1') if isinstance(v, str) else v
162 for k, v in env.iteritems()}
162 for k, v in env.iteritems()}
163
163
164 # Some hosting solutions are emulating hgwebdir, and dispatching directly
165 # to an hgweb instance using this environment variable. This was always
166 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
167 if not reponame:
168 reponame = env.get('REPO_NAME')
169
164 if altbaseurl:
170 if altbaseurl:
165 altbaseurl = util.url(altbaseurl)
171 altbaseurl = util.url(altbaseurl)
166
172
167 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
173 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
168 # the environment variables.
174 # the environment variables.
169 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
175 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
170 # how URLs are reconstructed.
176 # how URLs are reconstructed.
171 fullurl = env['wsgi.url_scheme'] + '://'
177 fullurl = env['wsgi.url_scheme'] + '://'
172
178
173 if altbaseurl and altbaseurl.scheme:
179 if altbaseurl and altbaseurl.scheme:
174 advertisedfullurl = altbaseurl.scheme + '://'
180 advertisedfullurl = altbaseurl.scheme + '://'
175 else:
181 else:
176 advertisedfullurl = fullurl
182 advertisedfullurl = fullurl
177
183
178 def addport(s, port):
184 def addport(s, port):
179 if s.startswith('https://'):
185 if s.startswith('https://'):
180 if port != '443':
186 if port != '443':
181 s += ':' + port
187 s += ':' + port
182 else:
188 else:
183 if port != '80':
189 if port != '80':
184 s += ':' + port
190 s += ':' + port
185
191
186 return s
192 return s
187
193
188 if env.get('HTTP_HOST'):
194 if env.get('HTTP_HOST'):
189 fullurl += env['HTTP_HOST']
195 fullurl += env['HTTP_HOST']
190 else:
196 else:
191 fullurl += env['SERVER_NAME']
197 fullurl += env['SERVER_NAME']
192 fullurl = addport(fullurl, env['SERVER_PORT'])
198 fullurl = addport(fullurl, env['SERVER_PORT'])
193
199
194 if altbaseurl and altbaseurl.host:
200 if altbaseurl and altbaseurl.host:
195 advertisedfullurl += altbaseurl.host
201 advertisedfullurl += altbaseurl.host
196
202
197 if altbaseurl.port:
203 if altbaseurl.port:
198 port = altbaseurl.port
204 port = altbaseurl.port
199 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
205 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
200 port = '80'
206 port = '80'
201 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
207 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
202 port = '443'
208 port = '443'
203 else:
209 else:
204 port = env['SERVER_PORT']
210 port = env['SERVER_PORT']
205
211
206 advertisedfullurl = addport(advertisedfullurl, port)
212 advertisedfullurl = addport(advertisedfullurl, port)
207 else:
213 else:
208 advertisedfullurl += env['SERVER_NAME']
214 advertisedfullurl += env['SERVER_NAME']
209 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
215 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
210
216
211 baseurl = fullurl
217 baseurl = fullurl
212 advertisedbaseurl = advertisedfullurl
218 advertisedbaseurl = advertisedfullurl
213
219
214 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
220 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
215 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
221 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
216
222
217 if altbaseurl:
223 if altbaseurl:
218 path = altbaseurl.path or ''
224 path = altbaseurl.path or ''
219 if path and not path.startswith('/'):
225 if path and not path.startswith('/'):
220 path = '/' + path
226 path = '/' + path
221 advertisedfullurl += util.urlreq.quote(path)
227 advertisedfullurl += util.urlreq.quote(path)
222 else:
228 else:
223 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
229 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
224
230
225 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
231 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
226
232
227 if env.get('QUERY_STRING'):
233 if env.get('QUERY_STRING'):
228 fullurl += '?' + env['QUERY_STRING']
234 fullurl += '?' + env['QUERY_STRING']
229 advertisedfullurl += '?' + env['QUERY_STRING']
235 advertisedfullurl += '?' + env['QUERY_STRING']
230
236
231 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
237 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
232 # that represents the repository being dispatched to. When computing
238 # that represents the repository being dispatched to. When computing
233 # the dispatch info, we ignore these leading path components.
239 # the dispatch info, we ignore these leading path components.
234
240
235 if altbaseurl:
241 if altbaseurl:
236 apppath = altbaseurl.path or ''
242 apppath = altbaseurl.path or ''
237 if apppath and not apppath.startswith('/'):
243 if apppath and not apppath.startswith('/'):
238 apppath = '/' + apppath
244 apppath = '/' + apppath
239 else:
245 else:
240 apppath = env.get('SCRIPT_NAME', '')
246 apppath = env.get('SCRIPT_NAME', '')
241
247
242 if reponame:
248 if reponame:
243 repoprefix = '/' + reponame.strip('/')
249 repoprefix = '/' + reponame.strip('/')
244
250
245 if not env.get('PATH_INFO'):
251 if not env.get('PATH_INFO'):
246 raise error.ProgrammingError('reponame requires PATH_INFO')
252 raise error.ProgrammingError('reponame requires PATH_INFO')
247
253
248 if not env['PATH_INFO'].startswith(repoprefix):
254 if not env['PATH_INFO'].startswith(repoprefix):
249 raise error.ProgrammingError('PATH_INFO does not begin with repo '
255 raise error.ProgrammingError('PATH_INFO does not begin with repo '
250 'name: %s (%s)' % (env['PATH_INFO'],
256 'name: %s (%s)' % (env['PATH_INFO'],
251 reponame))
257 reponame))
252
258
253 dispatchpath = env['PATH_INFO'][len(repoprefix):]
259 dispatchpath = env['PATH_INFO'][len(repoprefix):]
254
260
255 if dispatchpath and not dispatchpath.startswith('/'):
261 if dispatchpath and not dispatchpath.startswith('/'):
256 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
262 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
257 'not end at path delimiter: %s (%s)' %
263 'not end at path delimiter: %s (%s)' %
258 (env['PATH_INFO'], reponame))
264 (env['PATH_INFO'], reponame))
259
265
260 apppath = apppath.rstrip('/') + repoprefix
266 apppath = apppath.rstrip('/') + repoprefix
261 dispatchparts = dispatchpath.strip('/').split('/')
267 dispatchparts = dispatchpath.strip('/').split('/')
262 dispatchpath = '/'.join(dispatchparts)
268 dispatchpath = '/'.join(dispatchparts)
263
269
264 elif 'PATH_INFO' in env:
270 elif 'PATH_INFO' in env:
265 if env['PATH_INFO'].strip('/'):
271 if env['PATH_INFO'].strip('/'):
266 dispatchparts = env['PATH_INFO'].strip('/').split('/')
272 dispatchparts = env['PATH_INFO'].strip('/').split('/')
267 dispatchpath = '/'.join(dispatchparts)
273 dispatchpath = '/'.join(dispatchparts)
268 else:
274 else:
269 dispatchparts = []
275 dispatchparts = []
270 dispatchpath = ''
276 dispatchpath = ''
271 else:
277 else:
272 dispatchparts = []
278 dispatchparts = []
273 dispatchpath = None
279 dispatchpath = None
274
280
275 querystring = env.get('QUERY_STRING', '')
281 querystring = env.get('QUERY_STRING', '')
276
282
277 # We store as a list so we have ordering information. We also store as
283 # We store as a list so we have ordering information. We also store as
278 # a dict to facilitate fast lookup.
284 # a dict to facilitate fast lookup.
279 qsparams = multidict()
285 qsparams = multidict()
280 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
286 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
281 qsparams.add(k, v)
287 qsparams.add(k, v)
282
288
283 # HTTP_* keys contain HTTP request headers. The Headers structure should
289 # HTTP_* keys contain HTTP request headers. The Headers structure should
284 # perform case normalization for us. We just rewrite underscore to dash
290 # perform case normalization for us. We just rewrite underscore to dash
285 # so keys match what likely went over the wire.
291 # so keys match what likely went over the wire.
286 headers = []
292 headers = []
287 for k, v in env.iteritems():
293 for k, v in env.iteritems():
288 if k.startswith('HTTP_'):
294 if k.startswith('HTTP_'):
289 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
295 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
290
296
291 from . import wsgiheaders # avoid cycle
297 from . import wsgiheaders # avoid cycle
292 headers = wsgiheaders.Headers(headers)
298 headers = wsgiheaders.Headers(headers)
293
299
294 # This is kind of a lie because the HTTP header wasn't explicitly
300 # This is kind of a lie because the HTTP header wasn't explicitly
295 # sent. But for all intents and purposes it should be OK to lie about
301 # sent. But for all intents and purposes it should be OK to lie about
296 # this, since a consumer will either either value to determine how many
302 # this, since a consumer will either either value to determine how many
297 # bytes are available to read.
303 # bytes are available to read.
298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
304 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
299 headers['Content-Length'] = env['CONTENT_LENGTH']
305 headers['Content-Length'] = env['CONTENT_LENGTH']
300
306
301 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
307 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
302 headers['Content-Type'] = env['CONTENT_TYPE']
308 headers['Content-Type'] = env['CONTENT_TYPE']
303
309
304 bodyfh = env['wsgi.input']
310 bodyfh = env['wsgi.input']
305 if 'Content-Length' in headers:
311 if 'Content-Length' in headers:
306 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
312 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
307
313
308 return parsedrequest(method=env['REQUEST_METHOD'],
314 return parsedrequest(method=env['REQUEST_METHOD'],
309 url=fullurl, baseurl=baseurl,
315 url=fullurl, baseurl=baseurl,
310 advertisedurl=advertisedfullurl,
316 advertisedurl=advertisedfullurl,
311 advertisedbaseurl=advertisedbaseurl,
317 advertisedbaseurl=advertisedbaseurl,
312 urlscheme=env['wsgi.url_scheme'],
318 urlscheme=env['wsgi.url_scheme'],
313 remoteuser=env.get('REMOTE_USER'),
319 remoteuser=env.get('REMOTE_USER'),
314 remotehost=env.get('REMOTE_HOST'),
320 remotehost=env.get('REMOTE_HOST'),
315 apppath=apppath,
321 apppath=apppath,
316 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
322 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
317 reponame=reponame,
323 reponame=reponame,
318 querystring=querystring,
324 querystring=querystring,
319 qsparams=qsparams,
325 qsparams=qsparams,
320 headers=headers,
326 headers=headers,
321 bodyfh=bodyfh,
327 bodyfh=bodyfh,
322 rawenv=env)
328 rawenv=env)
323
329
324 class offsettrackingwriter(object):
330 class offsettrackingwriter(object):
325 """A file object like object that is append only and tracks write count.
331 """A file object like object that is append only and tracks write count.
326
332
327 Instances are bound to a callable. This callable is called with data
333 Instances are bound to a callable. This callable is called with data
328 whenever a ``write()`` is attempted.
334 whenever a ``write()`` is attempted.
329
335
330 Instances track the amount of written data so they can answer ``tell()``
336 Instances track the amount of written data so they can answer ``tell()``
331 requests.
337 requests.
332
338
333 The intent of this class is to wrap the ``write()`` function returned by
339 The intent of this class is to wrap the ``write()`` function returned by
334 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
340 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
335 not a file object, it doesn't implement other file object methods.
341 not a file object, it doesn't implement other file object methods.
336 """
342 """
337 def __init__(self, writefn):
343 def __init__(self, writefn):
338 self._write = writefn
344 self._write = writefn
339 self._offset = 0
345 self._offset = 0
340
346
341 def write(self, s):
347 def write(self, s):
342 res = self._write(s)
348 res = self._write(s)
343 # Some Python objects don't report the number of bytes written.
349 # Some Python objects don't report the number of bytes written.
344 if res is None:
350 if res is None:
345 self._offset += len(s)
351 self._offset += len(s)
346 else:
352 else:
347 self._offset += res
353 self._offset += res
348
354
349 def flush(self):
355 def flush(self):
350 pass
356 pass
351
357
352 def tell(self):
358 def tell(self):
353 return self._offset
359 return self._offset
354
360
355 class wsgiresponse(object):
361 class wsgiresponse(object):
356 """Represents a response to a WSGI request.
362 """Represents a response to a WSGI request.
357
363
358 A response consists of a status line, headers, and a body.
364 A response consists of a status line, headers, and a body.
359
365
360 Consumers must populate the ``status`` and ``headers`` fields and
366 Consumers must populate the ``status`` and ``headers`` fields and
361 make a call to a ``setbody*()`` method before the response can be
367 make a call to a ``setbody*()`` method before the response can be
362 issued.
368 issued.
363
369
364 When it is time to start sending the response over the wire,
370 When it is time to start sending the response over the wire,
365 ``sendresponse()`` is called. It handles emitting the header portion
371 ``sendresponse()`` is called. It handles emitting the header portion
366 of the response message. It then yields chunks of body data to be
372 of the response message. It then yields chunks of body data to be
367 written to the peer. Typically, the WSGI application itself calls
373 written to the peer. Typically, the WSGI application itself calls
368 and returns the value from ``sendresponse()``.
374 and returns the value from ``sendresponse()``.
369 """
375 """
370
376
371 def __init__(self, req, startresponse):
377 def __init__(self, req, startresponse):
372 """Create an empty response tied to a specific request.
378 """Create an empty response tied to a specific request.
373
379
374 ``req`` is a ``parsedrequest``. ``startresponse`` is the
380 ``req`` is a ``parsedrequest``. ``startresponse`` is the
375 ``start_response`` function passed to the WSGI application.
381 ``start_response`` function passed to the WSGI application.
376 """
382 """
377 self._req = req
383 self._req = req
378 self._startresponse = startresponse
384 self._startresponse = startresponse
379
385
380 self.status = None
386 self.status = None
381 from . import wsgiheaders # avoid cycle
387 from . import wsgiheaders # avoid cycle
382 self.headers = wsgiheaders.Headers([])
388 self.headers = wsgiheaders.Headers([])
383
389
384 self._bodybytes = None
390 self._bodybytes = None
385 self._bodygen = None
391 self._bodygen = None
386 self._bodywillwrite = False
392 self._bodywillwrite = False
387 self._started = False
393 self._started = False
388 self._bodywritefn = None
394 self._bodywritefn = None
389
395
390 def _verifybody(self):
396 def _verifybody(self):
391 if (self._bodybytes is not None or self._bodygen is not None
397 if (self._bodybytes is not None or self._bodygen is not None
392 or self._bodywillwrite):
398 or self._bodywillwrite):
393 raise error.ProgrammingError('cannot define body multiple times')
399 raise error.ProgrammingError('cannot define body multiple times')
394
400
395 def setbodybytes(self, b):
401 def setbodybytes(self, b):
396 """Define the response body as static bytes.
402 """Define the response body as static bytes.
397
403
398 The empty string signals that there is no response body.
404 The empty string signals that there is no response body.
399 """
405 """
400 self._verifybody()
406 self._verifybody()
401 self._bodybytes = b
407 self._bodybytes = b
402 self.headers['Content-Length'] = '%d' % len(b)
408 self.headers['Content-Length'] = '%d' % len(b)
403
409
404 def setbodygen(self, gen):
410 def setbodygen(self, gen):
405 """Define the response body as a generator of bytes."""
411 """Define the response body as a generator of bytes."""
406 self._verifybody()
412 self._verifybody()
407 self._bodygen = gen
413 self._bodygen = gen
408
414
409 def setbodywillwrite(self):
415 def setbodywillwrite(self):
410 """Signal an intent to use write() to emit the response body.
416 """Signal an intent to use write() to emit the response body.
411
417
412 **This is the least preferred way to send a body.**
418 **This is the least preferred way to send a body.**
413
419
414 It is preferred for WSGI applications to emit a generator of chunks
420 It is preferred for WSGI applications to emit a generator of chunks
415 constituting the response body. However, some consumers can't emit
421 constituting the response body. However, some consumers can't emit
416 data this way. So, WSGI provides a way to obtain a ``write(data)``
422 data this way. So, WSGI provides a way to obtain a ``write(data)``
417 function that can be used to synchronously perform an unbuffered
423 function that can be used to synchronously perform an unbuffered
418 write.
424 write.
419
425
420 Calling this function signals an intent to produce the body in this
426 Calling this function signals an intent to produce the body in this
421 manner.
427 manner.
422 """
428 """
423 self._verifybody()
429 self._verifybody()
424 self._bodywillwrite = True
430 self._bodywillwrite = True
425
431
426 def sendresponse(self):
432 def sendresponse(self):
427 """Send the generated response to the client.
433 """Send the generated response to the client.
428
434
429 Before this is called, ``status`` must be set and one of
435 Before this is called, ``status`` must be set and one of
430 ``setbodybytes()`` or ``setbodygen()`` must be called.
436 ``setbodybytes()`` or ``setbodygen()`` must be called.
431
437
432 Calling this method multiple times is not allowed.
438 Calling this method multiple times is not allowed.
433 """
439 """
434 if self._started:
440 if self._started:
435 raise error.ProgrammingError('sendresponse() called multiple times')
441 raise error.ProgrammingError('sendresponse() called multiple times')
436
442
437 self._started = True
443 self._started = True
438
444
439 if not self.status:
445 if not self.status:
440 raise error.ProgrammingError('status line not defined')
446 raise error.ProgrammingError('status line not defined')
441
447
442 if (self._bodybytes is None and self._bodygen is None
448 if (self._bodybytes is None and self._bodygen is None
443 and not self._bodywillwrite):
449 and not self._bodywillwrite):
444 raise error.ProgrammingError('response body not defined')
450 raise error.ProgrammingError('response body not defined')
445
451
446 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
452 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
447 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
453 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
448 # and SHOULD NOT generate other headers unless they could be used
454 # and SHOULD NOT generate other headers unless they could be used
449 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
455 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
450 # states that no response body can be issued. Content-Length can
456 # states that no response body can be issued. Content-Length can
451 # be sent. But if it is present, it should be the size of the response
457 # be sent. But if it is present, it should be the size of the response
452 # that wasn't transferred.
458 # that wasn't transferred.
453 if self.status.startswith('304 '):
459 if self.status.startswith('304 '):
454 # setbodybytes('') will set C-L to 0. This doesn't conform with the
460 # setbodybytes('') will set C-L to 0. This doesn't conform with the
455 # spec. So remove it.
461 # spec. So remove it.
456 if self.headers.get('Content-Length') == '0':
462 if self.headers.get('Content-Length') == '0':
457 del self.headers['Content-Length']
463 del self.headers['Content-Length']
458
464
459 # Strictly speaking, this is too strict. But until it causes
465 # Strictly speaking, this is too strict. But until it causes
460 # problems, let's be strict.
466 # problems, let's be strict.
461 badheaders = {k for k in self.headers.keys()
467 badheaders = {k for k in self.headers.keys()
462 if k.lower() not in ('date', 'etag', 'expires',
468 if k.lower() not in ('date', 'etag', 'expires',
463 'cache-control',
469 'cache-control',
464 'content-location',
470 'content-location',
465 'vary')}
471 'vary')}
466 if badheaders:
472 if badheaders:
467 raise error.ProgrammingError(
473 raise error.ProgrammingError(
468 'illegal header on 304 response: %s' %
474 'illegal header on 304 response: %s' %
469 ', '.join(sorted(badheaders)))
475 ', '.join(sorted(badheaders)))
470
476
471 if self._bodygen is not None or self._bodywillwrite:
477 if self._bodygen is not None or self._bodywillwrite:
472 raise error.ProgrammingError("must use setbodybytes('') with "
478 raise error.ProgrammingError("must use setbodybytes('') with "
473 "304 responses")
479 "304 responses")
474
480
475 # Various HTTP clients (notably httplib) won't read the HTTP response
481 # Various HTTP clients (notably httplib) won't read the HTTP response
476 # until the HTTP request has been sent in full. If servers (us) send a
482 # until the HTTP request has been sent in full. If servers (us) send a
477 # response before the HTTP request has been fully sent, the connection
483 # response before the HTTP request has been fully sent, the connection
478 # may deadlock because neither end is reading.
484 # may deadlock because neither end is reading.
479 #
485 #
480 # We work around this by "draining" the request data before
486 # We work around this by "draining" the request data before
481 # sending any response in some conditions.
487 # sending any response in some conditions.
482 drain = False
488 drain = False
483 close = False
489 close = False
484
490
485 # If the client sent Expect: 100-continue, we assume it is smart enough
491 # If the client sent Expect: 100-continue, we assume it is smart enough
486 # to deal with the server sending a response before reading the request.
492 # to deal with the server sending a response before reading the request.
487 # (httplib doesn't do this.)
493 # (httplib doesn't do this.)
488 if self._req.headers.get('Expect', '').lower() == '100-continue':
494 if self._req.headers.get('Expect', '').lower() == '100-continue':
489 pass
495 pass
490 # Only tend to request methods that have bodies. Strictly speaking,
496 # Only tend to request methods that have bodies. Strictly speaking,
491 # we should sniff for a body. But this is fine for our existing
497 # we should sniff for a body. But this is fine for our existing
492 # WSGI applications.
498 # WSGI applications.
493 elif self._req.method not in ('POST', 'PUT'):
499 elif self._req.method not in ('POST', 'PUT'):
494 pass
500 pass
495 else:
501 else:
496 # If we don't know how much data to read, there's no guarantee
502 # If we don't know how much data to read, there's no guarantee
497 # that we can drain the request responsibly. The WSGI
503 # that we can drain the request responsibly. The WSGI
498 # specification only says that servers *should* ensure the
504 # specification only says that servers *should* ensure the
499 # input stream doesn't overrun the actual request. So there's
505 # input stream doesn't overrun the actual request. So there's
500 # no guarantee that reading until EOF won't corrupt the stream
506 # no guarantee that reading until EOF won't corrupt the stream
501 # state.
507 # state.
502 if not isinstance(self._req.bodyfh, util.cappedreader):
508 if not isinstance(self._req.bodyfh, util.cappedreader):
503 close = True
509 close = True
504 else:
510 else:
505 # We /could/ only drain certain HTTP response codes. But 200 and
511 # We /could/ only drain certain HTTP response codes. But 200 and
506 # non-200 wire protocol responses both require draining. Since
512 # non-200 wire protocol responses both require draining. Since
507 # we have a capped reader in place for all situations where we
513 # we have a capped reader in place for all situations where we
508 # drain, it is safe to read from that stream. We'll either do
514 # drain, it is safe to read from that stream. We'll either do
509 # a drain or no-op if we're already at EOF.
515 # a drain or no-op if we're already at EOF.
510 drain = True
516 drain = True
511
517
512 if close:
518 if close:
513 self.headers['Connection'] = 'Close'
519 self.headers['Connection'] = 'Close'
514
520
515 if drain:
521 if drain:
516 assert isinstance(self._req.bodyfh, util.cappedreader)
522 assert isinstance(self._req.bodyfh, util.cappedreader)
517 while True:
523 while True:
518 chunk = self._req.bodyfh.read(32768)
524 chunk = self._req.bodyfh.read(32768)
519 if not chunk:
525 if not chunk:
520 break
526 break
521
527
522 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
528 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
523 k, v in self.headers.items()]
529 k, v in self.headers.items()]
524 write = self._startresponse(pycompat.sysstr(self.status),
530 write = self._startresponse(pycompat.sysstr(self.status),
525 strheaders)
531 strheaders)
526
532
527 if self._bodybytes:
533 if self._bodybytes:
528 yield self._bodybytes
534 yield self._bodybytes
529 elif self._bodygen:
535 elif self._bodygen:
530 for chunk in self._bodygen:
536 for chunk in self._bodygen:
531 yield chunk
537 yield chunk
532 elif self._bodywillwrite:
538 elif self._bodywillwrite:
533 self._bodywritefn = write
539 self._bodywritefn = write
534 else:
540 else:
535 error.ProgrammingError('do not know how to send body')
541 error.ProgrammingError('do not know how to send body')
536
542
537 def getbodyfile(self):
543 def getbodyfile(self):
538 """Obtain a file object like object representing the response body.
544 """Obtain a file object like object representing the response body.
539
545
540 For this to work, you must call ``setbodywillwrite()`` and then
546 For this to work, you must call ``setbodywillwrite()`` and then
541 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
547 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
542 function won't run to completion unless the generator is advanced. The
548 function won't run to completion unless the generator is advanced. The
543 generator yields not items. The easiest way to consume it is with
549 generator yields not items. The easiest way to consume it is with
544 ``list(res.sendresponse())``, which should resolve to an empty list -
550 ``list(res.sendresponse())``, which should resolve to an empty list -
545 ``[]``.
551 ``[]``.
546 """
552 """
547 if not self._bodywillwrite:
553 if not self._bodywillwrite:
548 raise error.ProgrammingError('must call setbodywillwrite() first')
554 raise error.ProgrammingError('must call setbodywillwrite() first')
549
555
550 if not self._started:
556 if not self._started:
551 raise error.ProgrammingError('must call sendresponse() first; did '
557 raise error.ProgrammingError('must call sendresponse() first; did '
552 'you remember to consume it since it '
558 'you remember to consume it since it '
553 'is a generator?')
559 'is a generator?')
554
560
555 assert self._bodywritefn
561 assert self._bodywritefn
556 return offsettrackingwriter(self._bodywritefn)
562 return offsettrackingwriter(self._bodywritefn)
557
563
558 def wsgiapplication(app_maker):
564 def wsgiapplication(app_maker):
559 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
565 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
560 can and should now be used as a WSGI application.'''
566 can and should now be used as a WSGI application.'''
561 application = app_maker()
567 application = app_maker()
562 def run_wsgi(env, respond):
568 def run_wsgi(env, respond):
563 return application(env, respond)
569 return application(env, respond)
564 return run_wsgi
570 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now