##// END OF EJS Templates
hgweb: clarify that apppath begins with a forward slash...
Gregory Szorc -
r36915:e67a2e05 default
parent child Browse files
Show More
@@ -1,668 +1,669 b''
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import socket
12 import socket
13 import wsgiref.headers as wsgiheaders
13 import wsgiref.headers as wsgiheaders
14 #import wsgiref.validate
14 #import wsgiref.validate
15
15
16 from .common import (
16 from .common import (
17 ErrorResponse,
17 ErrorResponse,
18 statusmessage,
18 statusmessage,
19 )
19 )
20
20
21 from ..thirdparty import (
21 from ..thirdparty import (
22 attr,
22 attr,
23 )
23 )
24 from .. import (
24 from .. import (
25 error,
25 error,
26 pycompat,
26 pycompat,
27 util,
27 util,
28 )
28 )
29
29
30 class multidict(object):
30 class multidict(object):
31 """A dict like object that can store multiple values for a key.
31 """A dict like object that can store multiple values for a key.
32
32
33 Used to store parsed request parameters.
33 Used to store parsed request parameters.
34
34
35 This is inspired by WebOb's class of the same name.
35 This is inspired by WebOb's class of the same name.
36 """
36 """
37 def __init__(self):
37 def __init__(self):
38 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
38 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
39 # don't rely on parameters that much, so it shouldn't be a perf issue.
39 # don't rely on parameters that much, so it shouldn't be a perf issue.
40 # we can always add dict for fast lookups.
40 # we can always add dict for fast lookups.
41 self._items = []
41 self._items = []
42
42
43 def __getitem__(self, key):
43 def __getitem__(self, key):
44 """Returns the last set value for a key."""
44 """Returns the last set value for a key."""
45 for k, v in reversed(self._items):
45 for k, v in reversed(self._items):
46 if k == key:
46 if k == key:
47 return v
47 return v
48
48
49 raise KeyError(key)
49 raise KeyError(key)
50
50
51 def __setitem__(self, key, value):
51 def __setitem__(self, key, value):
52 """Replace a values for a key with a new value."""
52 """Replace a values for a key with a new value."""
53 try:
53 try:
54 del self[key]
54 del self[key]
55 except KeyError:
55 except KeyError:
56 pass
56 pass
57
57
58 self._items.append((key, value))
58 self._items.append((key, value))
59
59
60 def __delitem__(self, key):
60 def __delitem__(self, key):
61 """Delete all values for a key."""
61 """Delete all values for a key."""
62 oldlen = len(self._items)
62 oldlen = len(self._items)
63
63
64 self._items[:] = [(k, v) for k, v in self._items if k != key]
64 self._items[:] = [(k, v) for k, v in self._items if k != key]
65
65
66 if oldlen == len(self._items):
66 if oldlen == len(self._items):
67 raise KeyError(key)
67 raise KeyError(key)
68
68
69 def __contains__(self, key):
69 def __contains__(self, key):
70 return any(k == key for k, v in self._items)
70 return any(k == key for k, v in self._items)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._items)
73 return len(self._items)
74
74
75 def get(self, key, default=None):
75 def get(self, key, default=None):
76 try:
76 try:
77 return self.__getitem__(key)
77 return self.__getitem__(key)
78 except KeyError:
78 except KeyError:
79 return default
79 return default
80
80
81 def add(self, key, value):
81 def add(self, key, value):
82 """Add a new value for a key. Does not replace existing values."""
82 """Add a new value for a key. Does not replace existing values."""
83 self._items.append((key, value))
83 self._items.append((key, value))
84
84
85 def getall(self, key):
85 def getall(self, key):
86 """Obtains all values for a key."""
86 """Obtains all values for a key."""
87 return [v for k, v in self._items if k == key]
87 return [v for k, v in self._items if k == key]
88
88
89 def getone(self, key):
89 def getone(self, key):
90 """Obtain a single value for a key.
90 """Obtain a single value for a key.
91
91
92 Raises KeyError if key not defined or it has multiple values set.
92 Raises KeyError if key not defined or it has multiple values set.
93 """
93 """
94 vals = self.getall(key)
94 vals = self.getall(key)
95
95
96 if not vals:
96 if not vals:
97 raise KeyError(key)
97 raise KeyError(key)
98
98
99 if len(vals) > 1:
99 if len(vals) > 1:
100 raise KeyError('multiple values for %r' % key)
100 raise KeyError('multiple values for %r' % key)
101
101
102 return vals[0]
102 return vals[0]
103
103
104 def asdictoflists(self):
104 def asdictoflists(self):
105 d = {}
105 d = {}
106 for k, v in self._items:
106 for k, v in self._items:
107 if k in d:
107 if k in d:
108 d[k].append(v)
108 d[k].append(v)
109 else:
109 else:
110 d[k] = [v]
110 d[k] = [v]
111
111
112 return d
112 return d
113
113
114 @attr.s(frozen=True)
114 @attr.s(frozen=True)
115 class parsedrequest(object):
115 class parsedrequest(object):
116 """Represents a parsed WSGI request.
116 """Represents a parsed WSGI request.
117
117
118 Contains both parsed parameters as well as a handle on the input stream.
118 Contains both parsed parameters as well as a handle on the input stream.
119 """
119 """
120
120
121 # Request method.
121 # Request method.
122 method = attr.ib()
122 method = attr.ib()
123 # Full URL for this request.
123 # Full URL for this request.
124 url = attr.ib()
124 url = attr.ib()
125 # URL without any path components. Just <proto>://<host><port>.
125 # URL without any path components. Just <proto>://<host><port>.
126 baseurl = attr.ib()
126 baseurl = attr.ib()
127 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
127 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
128 # of HTTP: Host header for hostname. This is likely what clients used.
128 # of HTTP: Host header for hostname. This is likely what clients used.
129 advertisedurl = attr.ib()
129 advertisedurl = attr.ib()
130 advertisedbaseurl = attr.ib()
130 advertisedbaseurl = attr.ib()
131 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
131 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
132 urlscheme = attr.ib()
132 urlscheme = attr.ib()
133 # Value of REMOTE_USER, if set, or None.
133 # Value of REMOTE_USER, if set, or None.
134 remoteuser = attr.ib()
134 remoteuser = attr.ib()
135 # Value of REMOTE_HOST, if set, or None.
135 # Value of REMOTE_HOST, if set, or None.
136 remotehost = attr.ib()
136 remotehost = attr.ib()
137 # WSGI application path.
137 # Relative WSGI application path. If defined, will begin with a
138 # ``/``.
138 apppath = attr.ib()
139 apppath = attr.ib()
139 # List of path parts to be used for dispatch.
140 # List of path parts to be used for dispatch.
140 dispatchparts = attr.ib()
141 dispatchparts = attr.ib()
141 # URL path component (no query string) used for dispatch. Can be
142 # URL path component (no query string) used for dispatch. Can be
142 # ``None`` to signal no path component given to the request, an
143 # ``None`` to signal no path component given to the request, an
143 # empty string to signal a request to the application's root URL,
144 # empty string to signal a request to the application's root URL,
144 # or a string not beginning with ``/`` containing the requested
145 # or a string not beginning with ``/`` containing the requested
145 # path under the application.
146 # path under the application.
146 dispatchpath = attr.ib()
147 dispatchpath = attr.ib()
147 # The name of the repository being accessed.
148 # The name of the repository being accessed.
148 reponame = attr.ib()
149 reponame = attr.ib()
149 # Raw query string (part after "?" in URL).
150 # Raw query string (part after "?" in URL).
150 querystring = attr.ib()
151 querystring = attr.ib()
151 # multidict of query string parameters.
152 # multidict of query string parameters.
152 qsparams = attr.ib()
153 qsparams = attr.ib()
153 # wsgiref.headers.Headers instance. Operates like a dict with case
154 # wsgiref.headers.Headers instance. Operates like a dict with case
154 # insensitive keys.
155 # insensitive keys.
155 headers = attr.ib()
156 headers = attr.ib()
156 # Request body input stream.
157 # Request body input stream.
157 bodyfh = attr.ib()
158 bodyfh = attr.ib()
158
159
159 def parserequestfromenv(env, bodyfh, reponame=None):
160 def parserequestfromenv(env, bodyfh, reponame=None):
160 """Parse URL components from environment variables.
161 """Parse URL components from environment variables.
161
162
162 WSGI defines request attributes via environment variables. This function
163 WSGI defines request attributes via environment variables. This function
163 parses the environment variables into a data structure.
164 parses the environment variables into a data structure.
164
165
165 If ``reponame`` is defined, the leading path components matching that
166 If ``reponame`` is defined, the leading path components matching that
166 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
167 This simulates the world view of a WSGI application that processes
168 This simulates the world view of a WSGI application that processes
168 requests from the base URL of a repo.
169 requests from the base URL of a repo.
169 """
170 """
170 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
171 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
171
172
172 # We first validate that the incoming object conforms with the WSGI spec.
173 # We first validate that the incoming object conforms with the WSGI spec.
173 # We only want to be dealing with spec-conforming WSGI implementations.
174 # We only want to be dealing with spec-conforming WSGI implementations.
174 # TODO enable this once we fix internal violations.
175 # TODO enable this once we fix internal violations.
175 #wsgiref.validate.check_environ(env)
176 #wsgiref.validate.check_environ(env)
176
177
177 # PEP-0333 states that environment keys and values are native strings
178 # PEP-0333 states that environment keys and values are native strings
178 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
179 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
179 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
180 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
180 # in Mercurial, so mass convert string keys and values to bytes.
181 # in Mercurial, so mass convert string keys and values to bytes.
181 if pycompat.ispy3:
182 if pycompat.ispy3:
182 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
183 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
183 env = {k: v.encode('latin-1') if isinstance(v, str) else v
184 env = {k: v.encode('latin-1') if isinstance(v, str) else v
184 for k, v in env.iteritems()}
185 for k, v in env.iteritems()}
185
186
186 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
187 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
187 # the environment variables.
188 # the environment variables.
188 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
189 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
189 # how URLs are reconstructed.
190 # how URLs are reconstructed.
190 fullurl = env['wsgi.url_scheme'] + '://'
191 fullurl = env['wsgi.url_scheme'] + '://'
191 advertisedfullurl = fullurl
192 advertisedfullurl = fullurl
192
193
193 def addport(s):
194 def addport(s):
194 if env['wsgi.url_scheme'] == 'https':
195 if env['wsgi.url_scheme'] == 'https':
195 if env['SERVER_PORT'] != '443':
196 if env['SERVER_PORT'] != '443':
196 s += ':' + env['SERVER_PORT']
197 s += ':' + env['SERVER_PORT']
197 else:
198 else:
198 if env['SERVER_PORT'] != '80':
199 if env['SERVER_PORT'] != '80':
199 s += ':' + env['SERVER_PORT']
200 s += ':' + env['SERVER_PORT']
200
201
201 return s
202 return s
202
203
203 if env.get('HTTP_HOST'):
204 if env.get('HTTP_HOST'):
204 fullurl += env['HTTP_HOST']
205 fullurl += env['HTTP_HOST']
205 else:
206 else:
206 fullurl += env['SERVER_NAME']
207 fullurl += env['SERVER_NAME']
207 fullurl = addport(fullurl)
208 fullurl = addport(fullurl)
208
209
209 advertisedfullurl += env['SERVER_NAME']
210 advertisedfullurl += env['SERVER_NAME']
210 advertisedfullurl = addport(advertisedfullurl)
211 advertisedfullurl = addport(advertisedfullurl)
211
212
212 baseurl = fullurl
213 baseurl = fullurl
213 advertisedbaseurl = advertisedfullurl
214 advertisedbaseurl = advertisedfullurl
214
215
215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
216 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
216 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
217 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
217 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
218 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
218 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
219 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
219
220
220 if env.get('QUERY_STRING'):
221 if env.get('QUERY_STRING'):
221 fullurl += '?' + env['QUERY_STRING']
222 fullurl += '?' + env['QUERY_STRING']
222 advertisedfullurl += '?' + env['QUERY_STRING']
223 advertisedfullurl += '?' + env['QUERY_STRING']
223
224
224 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
225 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
225 # that represents the repository being dispatched to. When computing
226 # that represents the repository being dispatched to. When computing
226 # the dispatch info, we ignore these leading path components.
227 # the dispatch info, we ignore these leading path components.
227
228
228 apppath = env.get('SCRIPT_NAME', '')
229 apppath = env.get('SCRIPT_NAME', '')
229
230
230 if reponame:
231 if reponame:
231 repoprefix = '/' + reponame.strip('/')
232 repoprefix = '/' + reponame.strip('/')
232
233
233 if not env.get('PATH_INFO'):
234 if not env.get('PATH_INFO'):
234 raise error.ProgrammingError('reponame requires PATH_INFO')
235 raise error.ProgrammingError('reponame requires PATH_INFO')
235
236
236 if not env['PATH_INFO'].startswith(repoprefix):
237 if not env['PATH_INFO'].startswith(repoprefix):
237 raise error.ProgrammingError('PATH_INFO does not begin with repo '
238 raise error.ProgrammingError('PATH_INFO does not begin with repo '
238 'name: %s (%s)' % (env['PATH_INFO'],
239 'name: %s (%s)' % (env['PATH_INFO'],
239 reponame))
240 reponame))
240
241
241 dispatchpath = env['PATH_INFO'][len(repoprefix):]
242 dispatchpath = env['PATH_INFO'][len(repoprefix):]
242
243
243 if dispatchpath and not dispatchpath.startswith('/'):
244 if dispatchpath and not dispatchpath.startswith('/'):
244 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
245 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
245 'not end at path delimiter: %s (%s)' %
246 'not end at path delimiter: %s (%s)' %
246 (env['PATH_INFO'], reponame))
247 (env['PATH_INFO'], reponame))
247
248
248 apppath = apppath.rstrip('/') + repoprefix
249 apppath = apppath.rstrip('/') + repoprefix
249 dispatchparts = dispatchpath.strip('/').split('/')
250 dispatchparts = dispatchpath.strip('/').split('/')
250 dispatchpath = '/'.join(dispatchparts)
251 dispatchpath = '/'.join(dispatchparts)
251
252
252 elif 'PATH_INFO' in env:
253 elif 'PATH_INFO' in env:
253 if env['PATH_INFO'].strip('/'):
254 if env['PATH_INFO'].strip('/'):
254 dispatchparts = env['PATH_INFO'].strip('/').split('/')
255 dispatchparts = env['PATH_INFO'].strip('/').split('/')
255 dispatchpath = '/'.join(dispatchparts)
256 dispatchpath = '/'.join(dispatchparts)
256 else:
257 else:
257 dispatchparts = []
258 dispatchparts = []
258 dispatchpath = ''
259 dispatchpath = ''
259 else:
260 else:
260 dispatchparts = []
261 dispatchparts = []
261 dispatchpath = None
262 dispatchpath = None
262
263
263 querystring = env.get('QUERY_STRING', '')
264 querystring = env.get('QUERY_STRING', '')
264
265
265 # We store as a list so we have ordering information. We also store as
266 # We store as a list so we have ordering information. We also store as
266 # a dict to facilitate fast lookup.
267 # a dict to facilitate fast lookup.
267 qsparams = multidict()
268 qsparams = multidict()
268 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
269 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
269 qsparams.add(k, v)
270 qsparams.add(k, v)
270
271
271 # HTTP_* keys contain HTTP request headers. The Headers structure should
272 # HTTP_* keys contain HTTP request headers. The Headers structure should
272 # perform case normalization for us. We just rewrite underscore to dash
273 # perform case normalization for us. We just rewrite underscore to dash
273 # so keys match what likely went over the wire.
274 # so keys match what likely went over the wire.
274 headers = []
275 headers = []
275 for k, v in env.iteritems():
276 for k, v in env.iteritems():
276 if k.startswith('HTTP_'):
277 if k.startswith('HTTP_'):
277 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
278 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
278
279
279 headers = wsgiheaders.Headers(headers)
280 headers = wsgiheaders.Headers(headers)
280
281
281 # This is kind of a lie because the HTTP header wasn't explicitly
282 # This is kind of a lie because the HTTP header wasn't explicitly
282 # sent. But for all intents and purposes it should be OK to lie about
283 # sent. But for all intents and purposes it should be OK to lie about
283 # this, since a consumer will either either value to determine how many
284 # this, since a consumer will either either value to determine how many
284 # bytes are available to read.
285 # bytes are available to read.
285 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
286 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
286 headers['Content-Length'] = env['CONTENT_LENGTH']
287 headers['Content-Length'] = env['CONTENT_LENGTH']
287
288
288 # TODO do this once we remove wsgirequest.inp, otherwise we could have
289 # TODO do this once we remove wsgirequest.inp, otherwise we could have
289 # multiple readers from the underlying input stream.
290 # multiple readers from the underlying input stream.
290 #bodyfh = env['wsgi.input']
291 #bodyfh = env['wsgi.input']
291 #if 'Content-Length' in headers:
292 #if 'Content-Length' in headers:
292 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
293 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
293
294
294 return parsedrequest(method=env['REQUEST_METHOD'],
295 return parsedrequest(method=env['REQUEST_METHOD'],
295 url=fullurl, baseurl=baseurl,
296 url=fullurl, baseurl=baseurl,
296 advertisedurl=advertisedfullurl,
297 advertisedurl=advertisedfullurl,
297 advertisedbaseurl=advertisedbaseurl,
298 advertisedbaseurl=advertisedbaseurl,
298 urlscheme=env['wsgi.url_scheme'],
299 urlscheme=env['wsgi.url_scheme'],
299 remoteuser=env.get('REMOTE_USER'),
300 remoteuser=env.get('REMOTE_USER'),
300 remotehost=env.get('REMOTE_HOST'),
301 remotehost=env.get('REMOTE_HOST'),
301 apppath=apppath,
302 apppath=apppath,
302 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
303 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
303 reponame=reponame,
304 reponame=reponame,
304 querystring=querystring,
305 querystring=querystring,
305 qsparams=qsparams,
306 qsparams=qsparams,
306 headers=headers,
307 headers=headers,
307 bodyfh=bodyfh)
308 bodyfh=bodyfh)
308
309
309 class offsettrackingwriter(object):
310 class offsettrackingwriter(object):
310 """A file object like object that is append only and tracks write count.
311 """A file object like object that is append only and tracks write count.
311
312
312 Instances are bound to a callable. This callable is called with data
313 Instances are bound to a callable. This callable is called with data
313 whenever a ``write()`` is attempted.
314 whenever a ``write()`` is attempted.
314
315
315 Instances track the amount of written data so they can answer ``tell()``
316 Instances track the amount of written data so they can answer ``tell()``
316 requests.
317 requests.
317
318
318 The intent of this class is to wrap the ``write()`` function returned by
319 The intent of this class is to wrap the ``write()`` function returned by
319 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
320 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
320 not a file object, it doesn't implement other file object methods.
321 not a file object, it doesn't implement other file object methods.
321 """
322 """
322 def __init__(self, writefn):
323 def __init__(self, writefn):
323 self._write = writefn
324 self._write = writefn
324 self._offset = 0
325 self._offset = 0
325
326
326 def write(self, s):
327 def write(self, s):
327 res = self._write(s)
328 res = self._write(s)
328 # Some Python objects don't report the number of bytes written.
329 # Some Python objects don't report the number of bytes written.
329 if res is None:
330 if res is None:
330 self._offset += len(s)
331 self._offset += len(s)
331 else:
332 else:
332 self._offset += res
333 self._offset += res
333
334
334 def flush(self):
335 def flush(self):
335 pass
336 pass
336
337
337 def tell(self):
338 def tell(self):
338 return self._offset
339 return self._offset
339
340
340 class wsgiresponse(object):
341 class wsgiresponse(object):
341 """Represents a response to a WSGI request.
342 """Represents a response to a WSGI request.
342
343
343 A response consists of a status line, headers, and a body.
344 A response consists of a status line, headers, and a body.
344
345
345 Consumers must populate the ``status`` and ``headers`` fields and
346 Consumers must populate the ``status`` and ``headers`` fields and
346 make a call to a ``setbody*()`` method before the response can be
347 make a call to a ``setbody*()`` method before the response can be
347 issued.
348 issued.
348
349
349 When it is time to start sending the response over the wire,
350 When it is time to start sending the response over the wire,
350 ``sendresponse()`` is called. It handles emitting the header portion
351 ``sendresponse()`` is called. It handles emitting the header portion
351 of the response message. It then yields chunks of body data to be
352 of the response message. It then yields chunks of body data to be
352 written to the peer. Typically, the WSGI application itself calls
353 written to the peer. Typically, the WSGI application itself calls
353 and returns the value from ``sendresponse()``.
354 and returns the value from ``sendresponse()``.
354 """
355 """
355
356
356 def __init__(self, req, startresponse):
357 def __init__(self, req, startresponse):
357 """Create an empty response tied to a specific request.
358 """Create an empty response tied to a specific request.
358
359
359 ``req`` is a ``parsedrequest``. ``startresponse`` is the
360 ``req`` is a ``parsedrequest``. ``startresponse`` is the
360 ``start_response`` function passed to the WSGI application.
361 ``start_response`` function passed to the WSGI application.
361 """
362 """
362 self._req = req
363 self._req = req
363 self._startresponse = startresponse
364 self._startresponse = startresponse
364
365
365 self.status = None
366 self.status = None
366 self.headers = wsgiheaders.Headers([])
367 self.headers = wsgiheaders.Headers([])
367
368
368 self._bodybytes = None
369 self._bodybytes = None
369 self._bodygen = None
370 self._bodygen = None
370 self._bodywillwrite = False
371 self._bodywillwrite = False
371 self._started = False
372 self._started = False
372 self._bodywritefn = None
373 self._bodywritefn = None
373
374
374 def _verifybody(self):
375 def _verifybody(self):
375 if (self._bodybytes is not None or self._bodygen is not None
376 if (self._bodybytes is not None or self._bodygen is not None
376 or self._bodywillwrite):
377 or self._bodywillwrite):
377 raise error.ProgrammingError('cannot define body multiple times')
378 raise error.ProgrammingError('cannot define body multiple times')
378
379
379 def setbodybytes(self, b):
380 def setbodybytes(self, b):
380 """Define the response body as static bytes.
381 """Define the response body as static bytes.
381
382
382 The empty string signals that there is no response body.
383 The empty string signals that there is no response body.
383 """
384 """
384 self._verifybody()
385 self._verifybody()
385 self._bodybytes = b
386 self._bodybytes = b
386 self.headers['Content-Length'] = '%d' % len(b)
387 self.headers['Content-Length'] = '%d' % len(b)
387
388
388 def setbodygen(self, gen):
389 def setbodygen(self, gen):
389 """Define the response body as a generator of bytes."""
390 """Define the response body as a generator of bytes."""
390 self._verifybody()
391 self._verifybody()
391 self._bodygen = gen
392 self._bodygen = gen
392
393
393 def setbodywillwrite(self):
394 def setbodywillwrite(self):
394 """Signal an intent to use write() to emit the response body.
395 """Signal an intent to use write() to emit the response body.
395
396
396 **This is the least preferred way to send a body.**
397 **This is the least preferred way to send a body.**
397
398
398 It is preferred for WSGI applications to emit a generator of chunks
399 It is preferred for WSGI applications to emit a generator of chunks
399 constituting the response body. However, some consumers can't emit
400 constituting the response body. However, some consumers can't emit
400 data this way. So, WSGI provides a way to obtain a ``write(data)``
401 data this way. So, WSGI provides a way to obtain a ``write(data)``
401 function that can be used to synchronously perform an unbuffered
402 function that can be used to synchronously perform an unbuffered
402 write.
403 write.
403
404
404 Calling this function signals an intent to produce the body in this
405 Calling this function signals an intent to produce the body in this
405 manner.
406 manner.
406 """
407 """
407 self._verifybody()
408 self._verifybody()
408 self._bodywillwrite = True
409 self._bodywillwrite = True
409
410
410 def sendresponse(self):
411 def sendresponse(self):
411 """Send the generated response to the client.
412 """Send the generated response to the client.
412
413
413 Before this is called, ``status`` must be set and one of
414 Before this is called, ``status`` must be set and one of
414 ``setbodybytes()`` or ``setbodygen()`` must be called.
415 ``setbodybytes()`` or ``setbodygen()`` must be called.
415
416
416 Calling this method multiple times is not allowed.
417 Calling this method multiple times is not allowed.
417 """
418 """
418 if self._started:
419 if self._started:
419 raise error.ProgrammingError('sendresponse() called multiple times')
420 raise error.ProgrammingError('sendresponse() called multiple times')
420
421
421 self._started = True
422 self._started = True
422
423
423 if not self.status:
424 if not self.status:
424 raise error.ProgrammingError('status line not defined')
425 raise error.ProgrammingError('status line not defined')
425
426
426 if (self._bodybytes is None and self._bodygen is None
427 if (self._bodybytes is None and self._bodygen is None
427 and not self._bodywillwrite):
428 and not self._bodywillwrite):
428 raise error.ProgrammingError('response body not defined')
429 raise error.ProgrammingError('response body not defined')
429
430
430 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
431 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
431 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
432 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
432 # and SHOULD NOT generate other headers unless they could be used
433 # and SHOULD NOT generate other headers unless they could be used
433 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
434 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
434 # states that no response body can be issued. Content-Length can
435 # states that no response body can be issued. Content-Length can
435 # be sent. But if it is present, it should be the size of the response
436 # be sent. But if it is present, it should be the size of the response
436 # that wasn't transferred.
437 # that wasn't transferred.
437 if self.status.startswith('304 '):
438 if self.status.startswith('304 '):
438 # setbodybytes('') will set C-L to 0. This doesn't conform with the
439 # setbodybytes('') will set C-L to 0. This doesn't conform with the
439 # spec. So remove it.
440 # spec. So remove it.
440 if self.headers.get('Content-Length') == '0':
441 if self.headers.get('Content-Length') == '0':
441 del self.headers['Content-Length']
442 del self.headers['Content-Length']
442
443
443 # Strictly speaking, this is too strict. But until it causes
444 # Strictly speaking, this is too strict. But until it causes
444 # problems, let's be strict.
445 # problems, let's be strict.
445 badheaders = {k for k in self.headers.keys()
446 badheaders = {k for k in self.headers.keys()
446 if k.lower() not in ('date', 'etag', 'expires',
447 if k.lower() not in ('date', 'etag', 'expires',
447 'cache-control',
448 'cache-control',
448 'content-location',
449 'content-location',
449 'vary')}
450 'vary')}
450 if badheaders:
451 if badheaders:
451 raise error.ProgrammingError(
452 raise error.ProgrammingError(
452 'illegal header on 304 response: %s' %
453 'illegal header on 304 response: %s' %
453 ', '.join(sorted(badheaders)))
454 ', '.join(sorted(badheaders)))
454
455
455 if self._bodygen is not None or self._bodywillwrite:
456 if self._bodygen is not None or self._bodywillwrite:
456 raise error.ProgrammingError("must use setbodybytes('') with "
457 raise error.ProgrammingError("must use setbodybytes('') with "
457 "304 responses")
458 "304 responses")
458
459
459 # Various HTTP clients (notably httplib) won't read the HTTP response
460 # Various HTTP clients (notably httplib) won't read the HTTP response
460 # until the HTTP request has been sent in full. If servers (us) send a
461 # until the HTTP request has been sent in full. If servers (us) send a
461 # response before the HTTP request has been fully sent, the connection
462 # response before the HTTP request has been fully sent, the connection
462 # may deadlock because neither end is reading.
463 # may deadlock because neither end is reading.
463 #
464 #
464 # We work around this by "draining" the request data before
465 # We work around this by "draining" the request data before
465 # sending any response in some conditions.
466 # sending any response in some conditions.
466 drain = False
467 drain = False
467 close = False
468 close = False
468
469
469 # If the client sent Expect: 100-continue, we assume it is smart enough
470 # If the client sent Expect: 100-continue, we assume it is smart enough
470 # to deal with the server sending a response before reading the request.
471 # to deal with the server sending a response before reading the request.
471 # (httplib doesn't do this.)
472 # (httplib doesn't do this.)
472 if self._req.headers.get('Expect', '').lower() == '100-continue':
473 if self._req.headers.get('Expect', '').lower() == '100-continue':
473 pass
474 pass
474 # Only tend to request methods that have bodies. Strictly speaking,
475 # Only tend to request methods that have bodies. Strictly speaking,
475 # we should sniff for a body. But this is fine for our existing
476 # we should sniff for a body. But this is fine for our existing
476 # WSGI applications.
477 # WSGI applications.
477 elif self._req.method not in ('POST', 'PUT'):
478 elif self._req.method not in ('POST', 'PUT'):
478 pass
479 pass
479 else:
480 else:
480 # If we don't know how much data to read, there's no guarantee
481 # If we don't know how much data to read, there's no guarantee
481 # that we can drain the request responsibly. The WSGI
482 # that we can drain the request responsibly. The WSGI
482 # specification only says that servers *should* ensure the
483 # specification only says that servers *should* ensure the
483 # input stream doesn't overrun the actual request. So there's
484 # input stream doesn't overrun the actual request. So there's
484 # no guarantee that reading until EOF won't corrupt the stream
485 # no guarantee that reading until EOF won't corrupt the stream
485 # state.
486 # state.
486 if not isinstance(self._req.bodyfh, util.cappedreader):
487 if not isinstance(self._req.bodyfh, util.cappedreader):
487 close = True
488 close = True
488 else:
489 else:
489 # We /could/ only drain certain HTTP response codes. But 200 and
490 # We /could/ only drain certain HTTP response codes. But 200 and
490 # non-200 wire protocol responses both require draining. Since
491 # non-200 wire protocol responses both require draining. Since
491 # we have a capped reader in place for all situations where we
492 # we have a capped reader in place for all situations where we
492 # drain, it is safe to read from that stream. We'll either do
493 # drain, it is safe to read from that stream. We'll either do
493 # a drain or no-op if we're already at EOF.
494 # a drain or no-op if we're already at EOF.
494 drain = True
495 drain = True
495
496
496 if close:
497 if close:
497 self.headers['Connection'] = 'Close'
498 self.headers['Connection'] = 'Close'
498
499
499 if drain:
500 if drain:
500 assert isinstance(self._req.bodyfh, util.cappedreader)
501 assert isinstance(self._req.bodyfh, util.cappedreader)
501 while True:
502 while True:
502 chunk = self._req.bodyfh.read(32768)
503 chunk = self._req.bodyfh.read(32768)
503 if not chunk:
504 if not chunk:
504 break
505 break
505
506
506 write = self._startresponse(pycompat.sysstr(self.status),
507 write = self._startresponse(pycompat.sysstr(self.status),
507 self.headers.items())
508 self.headers.items())
508
509
509 if self._bodybytes:
510 if self._bodybytes:
510 yield self._bodybytes
511 yield self._bodybytes
511 elif self._bodygen:
512 elif self._bodygen:
512 for chunk in self._bodygen:
513 for chunk in self._bodygen:
513 yield chunk
514 yield chunk
514 elif self._bodywillwrite:
515 elif self._bodywillwrite:
515 self._bodywritefn = write
516 self._bodywritefn = write
516 else:
517 else:
517 error.ProgrammingError('do not know how to send body')
518 error.ProgrammingError('do not know how to send body')
518
519
519 def getbodyfile(self):
520 def getbodyfile(self):
520 """Obtain a file object like object representing the response body.
521 """Obtain a file object like object representing the response body.
521
522
522 For this to work, you must call ``setbodywillwrite()`` and then
523 For this to work, you must call ``setbodywillwrite()`` and then
523 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
524 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
524 function won't run to completion unless the generator is advanced. The
525 function won't run to completion unless the generator is advanced. The
525 generator yields not items. The easiest way to consume it is with
526 generator yields not items. The easiest way to consume it is with
526 ``list(res.sendresponse())``, which should resolve to an empty list -
527 ``list(res.sendresponse())``, which should resolve to an empty list -
527 ``[]``.
528 ``[]``.
528 """
529 """
529 if not self._bodywillwrite:
530 if not self._bodywillwrite:
530 raise error.ProgrammingError('must call setbodywillwrite() first')
531 raise error.ProgrammingError('must call setbodywillwrite() first')
531
532
532 if not self._started:
533 if not self._started:
533 raise error.ProgrammingError('must call sendresponse() first; did '
534 raise error.ProgrammingError('must call sendresponse() first; did '
534 'you remember to consume it since it '
535 'you remember to consume it since it '
535 'is a generator?')
536 'is a generator?')
536
537
537 assert self._bodywritefn
538 assert self._bodywritefn
538 return offsettrackingwriter(self._bodywritefn)
539 return offsettrackingwriter(self._bodywritefn)
539
540
540 class wsgirequest(object):
541 class wsgirequest(object):
541 """Higher-level API for a WSGI request.
542 """Higher-level API for a WSGI request.
542
543
543 WSGI applications are invoked with 2 arguments. They are used to
544 WSGI applications are invoked with 2 arguments. They are used to
544 instantiate instances of this class, which provides higher-level APIs
545 instantiate instances of this class, which provides higher-level APIs
545 for obtaining request parameters, writing HTTP output, etc.
546 for obtaining request parameters, writing HTTP output, etc.
546 """
547 """
547 def __init__(self, wsgienv, start_response):
548 def __init__(self, wsgienv, start_response):
548 version = wsgienv[r'wsgi.version']
549 version = wsgienv[r'wsgi.version']
549 if (version < (1, 0)) or (version >= (2, 0)):
550 if (version < (1, 0)) or (version >= (2, 0)):
550 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
551 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
551 % version)
552 % version)
552
553
553 inp = wsgienv[r'wsgi.input']
554 inp = wsgienv[r'wsgi.input']
554
555
555 if r'HTTP_CONTENT_LENGTH' in wsgienv:
556 if r'HTTP_CONTENT_LENGTH' in wsgienv:
556 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
557 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
557 elif r'CONTENT_LENGTH' in wsgienv:
558 elif r'CONTENT_LENGTH' in wsgienv:
558 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
559 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
559
560
560 self.err = wsgienv[r'wsgi.errors']
561 self.err = wsgienv[r'wsgi.errors']
561 self.threaded = wsgienv[r'wsgi.multithread']
562 self.threaded = wsgienv[r'wsgi.multithread']
562 self.multiprocess = wsgienv[r'wsgi.multiprocess']
563 self.multiprocess = wsgienv[r'wsgi.multiprocess']
563 self.run_once = wsgienv[r'wsgi.run_once']
564 self.run_once = wsgienv[r'wsgi.run_once']
564 self.env = wsgienv
565 self.env = wsgienv
565 self.req = parserequestfromenv(wsgienv, inp)
566 self.req = parserequestfromenv(wsgienv, inp)
566 self.res = wsgiresponse(self.req, start_response)
567 self.res = wsgiresponse(self.req, start_response)
567 self._start_response = start_response
568 self._start_response = start_response
568 self.server_write = None
569 self.server_write = None
569 self.headers = []
570 self.headers = []
570
571
571 def respond(self, status, type, filename=None, body=None):
572 def respond(self, status, type, filename=None, body=None):
572 if not isinstance(type, str):
573 if not isinstance(type, str):
573 type = pycompat.sysstr(type)
574 type = pycompat.sysstr(type)
574 if self._start_response is not None:
575 if self._start_response is not None:
575 self.headers.append((r'Content-Type', type))
576 self.headers.append((r'Content-Type', type))
576 if filename:
577 if filename:
577 filename = (filename.rpartition('/')[-1]
578 filename = (filename.rpartition('/')[-1]
578 .replace('\\', '\\\\').replace('"', '\\"'))
579 .replace('\\', '\\\\').replace('"', '\\"'))
579 self.headers.append(('Content-Disposition',
580 self.headers.append(('Content-Disposition',
580 'inline; filename="%s"' % filename))
581 'inline; filename="%s"' % filename))
581 if body is not None:
582 if body is not None:
582 self.headers.append((r'Content-Length', str(len(body))))
583 self.headers.append((r'Content-Length', str(len(body))))
583
584
584 for k, v in self.headers:
585 for k, v in self.headers:
585 if not isinstance(v, str):
586 if not isinstance(v, str):
586 raise TypeError('header value must be string: %r' % (v,))
587 raise TypeError('header value must be string: %r' % (v,))
587
588
588 if isinstance(status, ErrorResponse):
589 if isinstance(status, ErrorResponse):
589 self.headers.extend(status.headers)
590 self.headers.extend(status.headers)
590 status = statusmessage(status.code, pycompat.bytestr(status))
591 status = statusmessage(status.code, pycompat.bytestr(status))
591 elif status == 200:
592 elif status == 200:
592 status = '200 Script output follows'
593 status = '200 Script output follows'
593 elif isinstance(status, int):
594 elif isinstance(status, int):
594 status = statusmessage(status)
595 status = statusmessage(status)
595
596
596 # Various HTTP clients (notably httplib) won't read the HTTP
597 # Various HTTP clients (notably httplib) won't read the HTTP
597 # response until the HTTP request has been sent in full. If servers
598 # response until the HTTP request has been sent in full. If servers
598 # (us) send a response before the HTTP request has been fully sent,
599 # (us) send a response before the HTTP request has been fully sent,
599 # the connection may deadlock because neither end is reading.
600 # the connection may deadlock because neither end is reading.
600 #
601 #
601 # We work around this by "draining" the request data before
602 # We work around this by "draining" the request data before
602 # sending any response in some conditions.
603 # sending any response in some conditions.
603 drain = False
604 drain = False
604 close = False
605 close = False
605
606
606 # If the client sent Expect: 100-continue, we assume it is smart
607 # If the client sent Expect: 100-continue, we assume it is smart
607 # enough to deal with the server sending a response before reading
608 # enough to deal with the server sending a response before reading
608 # the request. (httplib doesn't do this.)
609 # the request. (httplib doesn't do this.)
609 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
610 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
610 pass
611 pass
611 # Only tend to request methods that have bodies. Strictly speaking,
612 # Only tend to request methods that have bodies. Strictly speaking,
612 # we should sniff for a body. But this is fine for our existing
613 # we should sniff for a body. But this is fine for our existing
613 # WSGI applications.
614 # WSGI applications.
614 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
615 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
615 pass
616 pass
616 else:
617 else:
617 # If we don't know how much data to read, there's no guarantee
618 # If we don't know how much data to read, there's no guarantee
618 # that we can drain the request responsibly. The WSGI
619 # that we can drain the request responsibly. The WSGI
619 # specification only says that servers *should* ensure the
620 # specification only says that servers *should* ensure the
620 # input stream doesn't overrun the actual request. So there's
621 # input stream doesn't overrun the actual request. So there's
621 # no guarantee that reading until EOF won't corrupt the stream
622 # no guarantee that reading until EOF won't corrupt the stream
622 # state.
623 # state.
623 if not isinstance(self.req.bodyfh, util.cappedreader):
624 if not isinstance(self.req.bodyfh, util.cappedreader):
624 close = True
625 close = True
625 else:
626 else:
626 # We /could/ only drain certain HTTP response codes. But 200
627 # We /could/ only drain certain HTTP response codes. But 200
627 # and non-200 wire protocol responses both require draining.
628 # and non-200 wire protocol responses both require draining.
628 # Since we have a capped reader in place for all situations
629 # Since we have a capped reader in place for all situations
629 # where we drain, it is safe to read from that stream. We'll
630 # where we drain, it is safe to read from that stream. We'll
630 # either do a drain or no-op if we're already at EOF.
631 # either do a drain or no-op if we're already at EOF.
631 drain = True
632 drain = True
632
633
633 if close:
634 if close:
634 self.headers.append((r'Connection', r'Close'))
635 self.headers.append((r'Connection', r'Close'))
635
636
636 if drain:
637 if drain:
637 assert isinstance(self.req.bodyfh, util.cappedreader)
638 assert isinstance(self.req.bodyfh, util.cappedreader)
638 while True:
639 while True:
639 chunk = self.req.bodyfh.read(32768)
640 chunk = self.req.bodyfh.read(32768)
640 if not chunk:
641 if not chunk:
641 break
642 break
642
643
643 self.server_write = self._start_response(
644 self.server_write = self._start_response(
644 pycompat.sysstr(status), self.headers)
645 pycompat.sysstr(status), self.headers)
645 self._start_response = None
646 self._start_response = None
646 self.headers = []
647 self.headers = []
647 if body is not None:
648 if body is not None:
648 self.write(body)
649 self.write(body)
649 self.server_write = None
650 self.server_write = None
650
651
651 def write(self, thing):
652 def write(self, thing):
652 if thing:
653 if thing:
653 try:
654 try:
654 self.server_write(thing)
655 self.server_write(thing)
655 except socket.error as inst:
656 except socket.error as inst:
656 if inst[0] != errno.ECONNRESET:
657 if inst[0] != errno.ECONNRESET:
657 raise
658 raise
658
659
659 def flush(self):
660 def flush(self):
660 return None
661 return None
661
662
662 def wsgiapplication(app_maker):
663 def wsgiapplication(app_maker):
663 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
664 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
664 can and should now be used as a WSGI application.'''
665 can and should now be used as a WSGI application.'''
665 application = app_maker()
666 application = app_maker()
666 def run_wsgi(env, respond):
667 def run_wsgi(env, respond):
667 return application(env, respond)
668 return application(env, respond)
668 return run_wsgi
669 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now