##// END OF EJS Templates
hgweb: deduplicate code...
Manuel Jacob -
r45540:839328c5 stable
parent child Browse files
Show More
@@ -1,623 +1,623 b''
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 # import wsgiref.validate
11 # import wsgiref.validate
12
12
13 from ..thirdparty import attr
13 from ..thirdparty import attr
14 from .. import (
14 from .. import (
15 error,
15 error,
16 pycompat,
16 pycompat,
17 util,
17 util,
18 )
18 )
19
19
20
20
21 class multidict(object):
21 class multidict(object):
22 """A dict like object that can store multiple values for a key.
22 """A dict like object that can store multiple values for a key.
23
23
24 Used to store parsed request parameters.
24 Used to store parsed request parameters.
25
25
26 This is inspired by WebOb's class of the same name.
26 This is inspired by WebOb's class of the same name.
27 """
27 """
28
28
29 def __init__(self):
29 def __init__(self):
30 self._items = {}
30 self._items = {}
31
31
32 def __getitem__(self, key):
32 def __getitem__(self, key):
33 """Returns the last set value for a key."""
33 """Returns the last set value for a key."""
34 return self._items[key][-1]
34 return self._items[key][-1]
35
35
36 def __setitem__(self, key, value):
36 def __setitem__(self, key, value):
37 """Replace a values for a key with a new value."""
37 """Replace a values for a key with a new value."""
38 self._items[key] = [value]
38 self._items[key] = [value]
39
39
40 def __delitem__(self, key):
40 def __delitem__(self, key):
41 """Delete all values for a key."""
41 """Delete all values for a key."""
42 del self._items[key]
42 del self._items[key]
43
43
44 def __contains__(self, key):
44 def __contains__(self, key):
45 return key in self._items
45 return key in self._items
46
46
47 def __len__(self):
47 def __len__(self):
48 return len(self._items)
48 return len(self._items)
49
49
50 def get(self, key, default=None):
50 def get(self, key, default=None):
51 try:
51 try:
52 return self.__getitem__(key)
52 return self.__getitem__(key)
53 except KeyError:
53 except KeyError:
54 return default
54 return default
55
55
56 def add(self, key, value):
56 def add(self, key, value):
57 """Add a new value for a key. Does not replace existing values."""
57 """Add a new value for a key. Does not replace existing values."""
58 self._items.setdefault(key, []).append(value)
58 self._items.setdefault(key, []).append(value)
59
59
60 def getall(self, key):
60 def getall(self, key):
61 """Obtains all values for a key."""
61 """Obtains all values for a key."""
62 return self._items.get(key, [])
62 return self._items.get(key, [])
63
63
64 def getone(self, key):
64 def getone(self, key):
65 """Obtain a single value for a key.
65 """Obtain a single value for a key.
66
66
67 Raises KeyError if key not defined or it has multiple values set.
67 Raises KeyError if key not defined or it has multiple values set.
68 """
68 """
69 vals = self._items[key]
69 vals = self._items[key]
70
70
71 if len(vals) > 1:
71 if len(vals) > 1:
72 raise KeyError(b'multiple values for %r' % key)
72 raise KeyError(b'multiple values for %r' % key)
73
73
74 return vals[0]
74 return vals[0]
75
75
76 def asdictoflists(self):
76 def asdictoflists(self):
77 return {k: list(v) for k, v in pycompat.iteritems(self._items)}
77 return {k: list(v) for k, v in pycompat.iteritems(self._items)}
78
78
79
79
80 @attr.s(frozen=True)
80 @attr.s(frozen=True)
81 class parsedrequest(object):
81 class parsedrequest(object):
82 """Represents a parsed WSGI request.
82 """Represents a parsed WSGI request.
83
83
84 Contains both parsed parameters as well as a handle on the input stream.
84 Contains both parsed parameters as well as a handle on the input stream.
85 """
85 """
86
86
87 # Request method.
87 # Request method.
88 method = attr.ib()
88 method = attr.ib()
89 # Full URL for this request.
89 # Full URL for this request.
90 url = attr.ib()
90 url = attr.ib()
91 # URL without any path components. Just <proto>://<host><port>.
91 # URL without any path components. Just <proto>://<host><port>.
92 baseurl = attr.ib()
92 baseurl = attr.ib()
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
94 # of HTTP: Host header for hostname. This is likely what clients used.
94 # of HTTP: Host header for hostname. This is likely what clients used.
95 advertisedurl = attr.ib()
95 advertisedurl = attr.ib()
96 advertisedbaseurl = attr.ib()
96 advertisedbaseurl = attr.ib()
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
98 urlscheme = attr.ib()
98 urlscheme = attr.ib()
99 # Value of REMOTE_USER, if set, or None.
99 # Value of REMOTE_USER, if set, or None.
100 remoteuser = attr.ib()
100 remoteuser = attr.ib()
101 # Value of REMOTE_HOST, if set, or None.
101 # Value of REMOTE_HOST, if set, or None.
102 remotehost = attr.ib()
102 remotehost = attr.ib()
103 # Relative WSGI application path. If defined, will begin with a
103 # Relative WSGI application path. If defined, will begin with a
104 # ``/``.
104 # ``/``.
105 apppath = attr.ib()
105 apppath = attr.ib()
106 # List of path parts to be used for dispatch.
106 # List of path parts to be used for dispatch.
107 dispatchparts = attr.ib()
107 dispatchparts = attr.ib()
108 # URL path component (no query string) used for dispatch. Can be
108 # URL path component (no query string) used for dispatch. Can be
109 # ``None`` to signal no path component given to the request, an
109 # ``None`` to signal no path component given to the request, an
110 # empty string to signal a request to the application's root URL,
110 # empty string to signal a request to the application's root URL,
111 # or a string not beginning with ``/`` containing the requested
111 # or a string not beginning with ``/`` containing the requested
112 # path under the application.
112 # path under the application.
113 dispatchpath = attr.ib()
113 dispatchpath = attr.ib()
114 # The name of the repository being accessed.
114 # The name of the repository being accessed.
115 reponame = attr.ib()
115 reponame = attr.ib()
116 # Raw query string (part after "?" in URL).
116 # Raw query string (part after "?" in URL).
117 querystring = attr.ib()
117 querystring = attr.ib()
118 # multidict of query string parameters.
118 # multidict of query string parameters.
119 qsparams = attr.ib()
119 qsparams = attr.ib()
120 # wsgiref.headers.Headers instance. Operates like a dict with case
120 # wsgiref.headers.Headers instance. Operates like a dict with case
121 # insensitive keys.
121 # insensitive keys.
122 headers = attr.ib()
122 headers = attr.ib()
123 # Request body input stream.
123 # Request body input stream.
124 bodyfh = attr.ib()
124 bodyfh = attr.ib()
125 # WSGI environment dict, unmodified.
125 # WSGI environment dict, unmodified.
126 rawenv = attr.ib()
126 rawenv = attr.ib()
127
127
128
128
129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
130 """Parse URL components from environment variables.
130 """Parse URL components from environment variables.
131
131
132 WSGI defines request attributes via environment variables. This function
132 WSGI defines request attributes via environment variables. This function
133 parses the environment variables into a data structure.
133 parses the environment variables into a data structure.
134
134
135 If ``reponame`` is defined, the leading path components matching that
135 If ``reponame`` is defined, the leading path components matching that
136 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
136 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
137 This simulates the world view of a WSGI application that processes
137 This simulates the world view of a WSGI application that processes
138 requests from the base URL of a repo.
138 requests from the base URL of a repo.
139
139
140 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
140 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
141 is defined, it is used - instead of the WSGI environment variables - for
141 is defined, it is used - instead of the WSGI environment variables - for
142 constructing URL components up to and including the WSGI application path.
142 constructing URL components up to and including the WSGI application path.
143 For example, if the current WSGI application is at ``/repo`` and a request
143 For example, if the current WSGI application is at ``/repo`` and a request
144 is made to ``/rev/@`` with this argument set to
144 is made to ``/rev/@`` with this argument set to
145 ``http://myserver:9000/prefix``, the URL and path components will resolve as
145 ``http://myserver:9000/prefix``, the URL and path components will resolve as
146 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
146 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
147 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
147 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
148 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
148 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
149
149
150 ``bodyfh`` can be used to specify a file object to read the request body
150 ``bodyfh`` can be used to specify a file object to read the request body
151 from. If not defined, ``wsgi.input`` from the environment dict is used.
151 from. If not defined, ``wsgi.input`` from the environment dict is used.
152 """
152 """
153 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
153 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
154
154
155 # We first validate that the incoming object conforms with the WSGI spec.
155 # We first validate that the incoming object conforms with the WSGI spec.
156 # We only want to be dealing with spec-conforming WSGI implementations.
156 # We only want to be dealing with spec-conforming WSGI implementations.
157 # TODO enable this once we fix internal violations.
157 # TODO enable this once we fix internal violations.
158 # wsgiref.validate.check_environ(env)
158 # wsgiref.validate.check_environ(env)
159
159
160 # PEP-0333 states that environment keys and values are native strings
160 # PEP-0333 states that environment keys and values are native strings
161 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
161 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
162 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
162 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
163 # in Mercurial, so mass convert string keys and values to bytes.
163 # in Mercurial, so mass convert string keys and values to bytes.
164 if pycompat.ispy3:
164 if pycompat.ispy3:
165 env = {k.encode('latin-1'): v for k, v in pycompat.iteritems(env)}
165 def tobytes(s):
166 env = {
166 if not isinstance(s, str):
167 k: v.encode('latin-1') if isinstance(v, str) else v
167 return s
168 for k, v in pycompat.iteritems(env)
168 return s.encode('latin-1')
169 }
169 env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
170
170
171 # Some hosting solutions are emulating hgwebdir, and dispatching directly
171 # Some hosting solutions are emulating hgwebdir, and dispatching directly
172 # to an hgweb instance using this environment variable. This was always
172 # to an hgweb instance using this environment variable. This was always
173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
174 if not reponame:
174 if not reponame:
175 reponame = env.get(b'REPO_NAME')
175 reponame = env.get(b'REPO_NAME')
176
176
177 if altbaseurl:
177 if altbaseurl:
178 altbaseurl = util.url(altbaseurl)
178 altbaseurl = util.url(altbaseurl)
179
179
180 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
180 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
181 # the environment variables.
181 # the environment variables.
182 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
182 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
183 # how URLs are reconstructed.
183 # how URLs are reconstructed.
184 fullurl = env[b'wsgi.url_scheme'] + b'://'
184 fullurl = env[b'wsgi.url_scheme'] + b'://'
185
185
186 if altbaseurl and altbaseurl.scheme:
186 if altbaseurl and altbaseurl.scheme:
187 advertisedfullurl = altbaseurl.scheme + b'://'
187 advertisedfullurl = altbaseurl.scheme + b'://'
188 else:
188 else:
189 advertisedfullurl = fullurl
189 advertisedfullurl = fullurl
190
190
191 def addport(s, port):
191 def addport(s, port):
192 if s.startswith(b'https://'):
192 if s.startswith(b'https://'):
193 if port != b'443':
193 if port != b'443':
194 s += b':' + port
194 s += b':' + port
195 else:
195 else:
196 if port != b'80':
196 if port != b'80':
197 s += b':' + port
197 s += b':' + port
198
198
199 return s
199 return s
200
200
201 if env.get(b'HTTP_HOST'):
201 if env.get(b'HTTP_HOST'):
202 fullurl += env[b'HTTP_HOST']
202 fullurl += env[b'HTTP_HOST']
203 else:
203 else:
204 fullurl += env[b'SERVER_NAME']
204 fullurl += env[b'SERVER_NAME']
205 fullurl = addport(fullurl, env[b'SERVER_PORT'])
205 fullurl = addport(fullurl, env[b'SERVER_PORT'])
206
206
207 if altbaseurl and altbaseurl.host:
207 if altbaseurl and altbaseurl.host:
208 advertisedfullurl += altbaseurl.host
208 advertisedfullurl += altbaseurl.host
209
209
210 if altbaseurl.port:
210 if altbaseurl.port:
211 port = altbaseurl.port
211 port = altbaseurl.port
212 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
212 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
213 port = b'80'
213 port = b'80'
214 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
214 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
215 port = b'443'
215 port = b'443'
216 else:
216 else:
217 port = env[b'SERVER_PORT']
217 port = env[b'SERVER_PORT']
218
218
219 advertisedfullurl = addport(advertisedfullurl, port)
219 advertisedfullurl = addport(advertisedfullurl, port)
220 else:
220 else:
221 advertisedfullurl += env[b'SERVER_NAME']
221 advertisedfullurl += env[b'SERVER_NAME']
222 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
222 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
223
223
224 baseurl = fullurl
224 baseurl = fullurl
225 advertisedbaseurl = advertisedfullurl
225 advertisedbaseurl = advertisedfullurl
226
226
227 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
227 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
228 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
228 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
229
229
230 if altbaseurl:
230 if altbaseurl:
231 path = altbaseurl.path or b''
231 path = altbaseurl.path or b''
232 if path and not path.startswith(b'/'):
232 if path and not path.startswith(b'/'):
233 path = b'/' + path
233 path = b'/' + path
234 advertisedfullurl += util.urlreq.quote(path)
234 advertisedfullurl += util.urlreq.quote(path)
235 else:
235 else:
236 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
236 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
237
237
238 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
238 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
239
239
240 if env.get(b'QUERY_STRING'):
240 if env.get(b'QUERY_STRING'):
241 fullurl += b'?' + env[b'QUERY_STRING']
241 fullurl += b'?' + env[b'QUERY_STRING']
242 advertisedfullurl += b'?' + env[b'QUERY_STRING']
242 advertisedfullurl += b'?' + env[b'QUERY_STRING']
243
243
244 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
244 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
245 # that represents the repository being dispatched to. When computing
245 # that represents the repository being dispatched to. When computing
246 # the dispatch info, we ignore these leading path components.
246 # the dispatch info, we ignore these leading path components.
247
247
248 if altbaseurl:
248 if altbaseurl:
249 apppath = altbaseurl.path or b''
249 apppath = altbaseurl.path or b''
250 if apppath and not apppath.startswith(b'/'):
250 if apppath and not apppath.startswith(b'/'):
251 apppath = b'/' + apppath
251 apppath = b'/' + apppath
252 else:
252 else:
253 apppath = env.get(b'SCRIPT_NAME', b'')
253 apppath = env.get(b'SCRIPT_NAME', b'')
254
254
255 if reponame:
255 if reponame:
256 repoprefix = b'/' + reponame.strip(b'/')
256 repoprefix = b'/' + reponame.strip(b'/')
257
257
258 if not env.get(b'PATH_INFO'):
258 if not env.get(b'PATH_INFO'):
259 raise error.ProgrammingError(b'reponame requires PATH_INFO')
259 raise error.ProgrammingError(b'reponame requires PATH_INFO')
260
260
261 if not env[b'PATH_INFO'].startswith(repoprefix):
261 if not env[b'PATH_INFO'].startswith(repoprefix):
262 raise error.ProgrammingError(
262 raise error.ProgrammingError(
263 b'PATH_INFO does not begin with repo '
263 b'PATH_INFO does not begin with repo '
264 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
264 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
265 )
265 )
266
266
267 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
267 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
268
268
269 if dispatchpath and not dispatchpath.startswith(b'/'):
269 if dispatchpath and not dispatchpath.startswith(b'/'):
270 raise error.ProgrammingError(
270 raise error.ProgrammingError(
271 b'reponame prefix of PATH_INFO does '
271 b'reponame prefix of PATH_INFO does '
272 b'not end at path delimiter: %s (%s)'
272 b'not end at path delimiter: %s (%s)'
273 % (env[b'PATH_INFO'], reponame)
273 % (env[b'PATH_INFO'], reponame)
274 )
274 )
275
275
276 apppath = apppath.rstrip(b'/') + repoprefix
276 apppath = apppath.rstrip(b'/') + repoprefix
277 dispatchparts = dispatchpath.strip(b'/').split(b'/')
277 dispatchparts = dispatchpath.strip(b'/').split(b'/')
278 dispatchpath = b'/'.join(dispatchparts)
278 dispatchpath = b'/'.join(dispatchparts)
279
279
280 elif b'PATH_INFO' in env:
280 elif b'PATH_INFO' in env:
281 if env[b'PATH_INFO'].strip(b'/'):
281 if env[b'PATH_INFO'].strip(b'/'):
282 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
282 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
283 dispatchpath = b'/'.join(dispatchparts)
283 dispatchpath = b'/'.join(dispatchparts)
284 else:
284 else:
285 dispatchparts = []
285 dispatchparts = []
286 dispatchpath = b''
286 dispatchpath = b''
287 else:
287 else:
288 dispatchparts = []
288 dispatchparts = []
289 dispatchpath = None
289 dispatchpath = None
290
290
291 querystring = env.get(b'QUERY_STRING', b'')
291 querystring = env.get(b'QUERY_STRING', b'')
292
292
293 # We store as a list so we have ordering information. We also store as
293 # We store as a list so we have ordering information. We also store as
294 # a dict to facilitate fast lookup.
294 # a dict to facilitate fast lookup.
295 qsparams = multidict()
295 qsparams = multidict()
296 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
296 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
297 qsparams.add(k, v)
297 qsparams.add(k, v)
298
298
299 # HTTP_* keys contain HTTP request headers. The Headers structure should
299 # HTTP_* keys contain HTTP request headers. The Headers structure should
300 # perform case normalization for us. We just rewrite underscore to dash
300 # perform case normalization for us. We just rewrite underscore to dash
301 # so keys match what likely went over the wire.
301 # so keys match what likely went over the wire.
302 headers = []
302 headers = []
303 for k, v in pycompat.iteritems(env):
303 for k, v in pycompat.iteritems(env):
304 if k.startswith(b'HTTP_'):
304 if k.startswith(b'HTTP_'):
305 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
305 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
306
306
307 from . import wsgiheaders # avoid cycle
307 from . import wsgiheaders # avoid cycle
308
308
309 headers = wsgiheaders.Headers(headers)
309 headers = wsgiheaders.Headers(headers)
310
310
311 # This is kind of a lie because the HTTP header wasn't explicitly
311 # This is kind of a lie because the HTTP header wasn't explicitly
312 # sent. But for all intents and purposes it should be OK to lie about
312 # sent. But for all intents and purposes it should be OK to lie about
313 # this, since a consumer will either either value to determine how many
313 # this, since a consumer will either either value to determine how many
314 # bytes are available to read.
314 # bytes are available to read.
315 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
315 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
316 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
316 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
317
317
318 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
318 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
319 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
319 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
320
320
321 if bodyfh is None:
321 if bodyfh is None:
322 bodyfh = env[b'wsgi.input']
322 bodyfh = env[b'wsgi.input']
323 if b'Content-Length' in headers:
323 if b'Content-Length' in headers:
324 bodyfh = util.cappedreader(
324 bodyfh = util.cappedreader(
325 bodyfh, int(headers[b'Content-Length'] or b'0')
325 bodyfh, int(headers[b'Content-Length'] or b'0')
326 )
326 )
327
327
328 return parsedrequest(
328 return parsedrequest(
329 method=env[b'REQUEST_METHOD'],
329 method=env[b'REQUEST_METHOD'],
330 url=fullurl,
330 url=fullurl,
331 baseurl=baseurl,
331 baseurl=baseurl,
332 advertisedurl=advertisedfullurl,
332 advertisedurl=advertisedfullurl,
333 advertisedbaseurl=advertisedbaseurl,
333 advertisedbaseurl=advertisedbaseurl,
334 urlscheme=env[b'wsgi.url_scheme'],
334 urlscheme=env[b'wsgi.url_scheme'],
335 remoteuser=env.get(b'REMOTE_USER'),
335 remoteuser=env.get(b'REMOTE_USER'),
336 remotehost=env.get(b'REMOTE_HOST'),
336 remotehost=env.get(b'REMOTE_HOST'),
337 apppath=apppath,
337 apppath=apppath,
338 dispatchparts=dispatchparts,
338 dispatchparts=dispatchparts,
339 dispatchpath=dispatchpath,
339 dispatchpath=dispatchpath,
340 reponame=reponame,
340 reponame=reponame,
341 querystring=querystring,
341 querystring=querystring,
342 qsparams=qsparams,
342 qsparams=qsparams,
343 headers=headers,
343 headers=headers,
344 bodyfh=bodyfh,
344 bodyfh=bodyfh,
345 rawenv=env,
345 rawenv=env,
346 )
346 )
347
347
348
348
349 class offsettrackingwriter(object):
349 class offsettrackingwriter(object):
350 """A file object like object that is append only and tracks write count.
350 """A file object like object that is append only and tracks write count.
351
351
352 Instances are bound to a callable. This callable is called with data
352 Instances are bound to a callable. This callable is called with data
353 whenever a ``write()`` is attempted.
353 whenever a ``write()`` is attempted.
354
354
355 Instances track the amount of written data so they can answer ``tell()``
355 Instances track the amount of written data so they can answer ``tell()``
356 requests.
356 requests.
357
357
358 The intent of this class is to wrap the ``write()`` function returned by
358 The intent of this class is to wrap the ``write()`` function returned by
359 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
359 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
360 not a file object, it doesn't implement other file object methods.
360 not a file object, it doesn't implement other file object methods.
361 """
361 """
362
362
363 def __init__(self, writefn):
363 def __init__(self, writefn):
364 self._write = writefn
364 self._write = writefn
365 self._offset = 0
365 self._offset = 0
366
366
367 def write(self, s):
367 def write(self, s):
368 res = self._write(s)
368 res = self._write(s)
369 # Some Python objects don't report the number of bytes written.
369 # Some Python objects don't report the number of bytes written.
370 if res is None:
370 if res is None:
371 self._offset += len(s)
371 self._offset += len(s)
372 else:
372 else:
373 self._offset += res
373 self._offset += res
374
374
375 def flush(self):
375 def flush(self):
376 pass
376 pass
377
377
378 def tell(self):
378 def tell(self):
379 return self._offset
379 return self._offset
380
380
381
381
382 class wsgiresponse(object):
382 class wsgiresponse(object):
383 """Represents a response to a WSGI request.
383 """Represents a response to a WSGI request.
384
384
385 A response consists of a status line, headers, and a body.
385 A response consists of a status line, headers, and a body.
386
386
387 Consumers must populate the ``status`` and ``headers`` fields and
387 Consumers must populate the ``status`` and ``headers`` fields and
388 make a call to a ``setbody*()`` method before the response can be
388 make a call to a ``setbody*()`` method before the response can be
389 issued.
389 issued.
390
390
391 When it is time to start sending the response over the wire,
391 When it is time to start sending the response over the wire,
392 ``sendresponse()`` is called. It handles emitting the header portion
392 ``sendresponse()`` is called. It handles emitting the header portion
393 of the response message. It then yields chunks of body data to be
393 of the response message. It then yields chunks of body data to be
394 written to the peer. Typically, the WSGI application itself calls
394 written to the peer. Typically, the WSGI application itself calls
395 and returns the value from ``sendresponse()``.
395 and returns the value from ``sendresponse()``.
396 """
396 """
397
397
398 def __init__(self, req, startresponse):
398 def __init__(self, req, startresponse):
399 """Create an empty response tied to a specific request.
399 """Create an empty response tied to a specific request.
400
400
401 ``req`` is a ``parsedrequest``. ``startresponse`` is the
401 ``req`` is a ``parsedrequest``. ``startresponse`` is the
402 ``start_response`` function passed to the WSGI application.
402 ``start_response`` function passed to the WSGI application.
403 """
403 """
404 self._req = req
404 self._req = req
405 self._startresponse = startresponse
405 self._startresponse = startresponse
406
406
407 self.status = None
407 self.status = None
408 from . import wsgiheaders # avoid cycle
408 from . import wsgiheaders # avoid cycle
409
409
410 self.headers = wsgiheaders.Headers([])
410 self.headers = wsgiheaders.Headers([])
411
411
412 self._bodybytes = None
412 self._bodybytes = None
413 self._bodygen = None
413 self._bodygen = None
414 self._bodywillwrite = False
414 self._bodywillwrite = False
415 self._started = False
415 self._started = False
416 self._bodywritefn = None
416 self._bodywritefn = None
417
417
418 def _verifybody(self):
418 def _verifybody(self):
419 if (
419 if (
420 self._bodybytes is not None
420 self._bodybytes is not None
421 or self._bodygen is not None
421 or self._bodygen is not None
422 or self._bodywillwrite
422 or self._bodywillwrite
423 ):
423 ):
424 raise error.ProgrammingError(b'cannot define body multiple times')
424 raise error.ProgrammingError(b'cannot define body multiple times')
425
425
426 def setbodybytes(self, b):
426 def setbodybytes(self, b):
427 """Define the response body as static bytes.
427 """Define the response body as static bytes.
428
428
429 The empty string signals that there is no response body.
429 The empty string signals that there is no response body.
430 """
430 """
431 self._verifybody()
431 self._verifybody()
432 self._bodybytes = b
432 self._bodybytes = b
433 self.headers[b'Content-Length'] = b'%d' % len(b)
433 self.headers[b'Content-Length'] = b'%d' % len(b)
434
434
435 def setbodygen(self, gen):
435 def setbodygen(self, gen):
436 """Define the response body as a generator of bytes."""
436 """Define the response body as a generator of bytes."""
437 self._verifybody()
437 self._verifybody()
438 self._bodygen = gen
438 self._bodygen = gen
439
439
440 def setbodywillwrite(self):
440 def setbodywillwrite(self):
441 """Signal an intent to use write() to emit the response body.
441 """Signal an intent to use write() to emit the response body.
442
442
443 **This is the least preferred way to send a body.**
443 **This is the least preferred way to send a body.**
444
444
445 It is preferred for WSGI applications to emit a generator of chunks
445 It is preferred for WSGI applications to emit a generator of chunks
446 constituting the response body. However, some consumers can't emit
446 constituting the response body. However, some consumers can't emit
447 data this way. So, WSGI provides a way to obtain a ``write(data)``
447 data this way. So, WSGI provides a way to obtain a ``write(data)``
448 function that can be used to synchronously perform an unbuffered
448 function that can be used to synchronously perform an unbuffered
449 write.
449 write.
450
450
451 Calling this function signals an intent to produce the body in this
451 Calling this function signals an intent to produce the body in this
452 manner.
452 manner.
453 """
453 """
454 self._verifybody()
454 self._verifybody()
455 self._bodywillwrite = True
455 self._bodywillwrite = True
456
456
457 def sendresponse(self):
457 def sendresponse(self):
458 """Send the generated response to the client.
458 """Send the generated response to the client.
459
459
460 Before this is called, ``status`` must be set and one of
460 Before this is called, ``status`` must be set and one of
461 ``setbodybytes()`` or ``setbodygen()`` must be called.
461 ``setbodybytes()`` or ``setbodygen()`` must be called.
462
462
463 Calling this method multiple times is not allowed.
463 Calling this method multiple times is not allowed.
464 """
464 """
465 if self._started:
465 if self._started:
466 raise error.ProgrammingError(
466 raise error.ProgrammingError(
467 b'sendresponse() called multiple times'
467 b'sendresponse() called multiple times'
468 )
468 )
469
469
470 self._started = True
470 self._started = True
471
471
472 if not self.status:
472 if not self.status:
473 raise error.ProgrammingError(b'status line not defined')
473 raise error.ProgrammingError(b'status line not defined')
474
474
475 if (
475 if (
476 self._bodybytes is None
476 self._bodybytes is None
477 and self._bodygen is None
477 and self._bodygen is None
478 and not self._bodywillwrite
478 and not self._bodywillwrite
479 ):
479 ):
480 raise error.ProgrammingError(b'response body not defined')
480 raise error.ProgrammingError(b'response body not defined')
481
481
482 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
482 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
483 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
483 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
484 # and SHOULD NOT generate other headers unless they could be used
484 # and SHOULD NOT generate other headers unless they could be used
485 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
485 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
486 # states that no response body can be issued. Content-Length can
486 # states that no response body can be issued. Content-Length can
487 # be sent. But if it is present, it should be the size of the response
487 # be sent. But if it is present, it should be the size of the response
488 # that wasn't transferred.
488 # that wasn't transferred.
489 if self.status.startswith(b'304 '):
489 if self.status.startswith(b'304 '):
490 # setbodybytes('') will set C-L to 0. This doesn't conform with the
490 # setbodybytes('') will set C-L to 0. This doesn't conform with the
491 # spec. So remove it.
491 # spec. So remove it.
492 if self.headers.get(b'Content-Length') == b'0':
492 if self.headers.get(b'Content-Length') == b'0':
493 del self.headers[b'Content-Length']
493 del self.headers[b'Content-Length']
494
494
495 # Strictly speaking, this is too strict. But until it causes
495 # Strictly speaking, this is too strict. But until it causes
496 # problems, let's be strict.
496 # problems, let's be strict.
497 badheaders = {
497 badheaders = {
498 k
498 k
499 for k in self.headers.keys()
499 for k in self.headers.keys()
500 if k.lower()
500 if k.lower()
501 not in (
501 not in (
502 b'date',
502 b'date',
503 b'etag',
503 b'etag',
504 b'expires',
504 b'expires',
505 b'cache-control',
505 b'cache-control',
506 b'content-location',
506 b'content-location',
507 b'content-security-policy',
507 b'content-security-policy',
508 b'vary',
508 b'vary',
509 )
509 )
510 }
510 }
511 if badheaders:
511 if badheaders:
512 raise error.ProgrammingError(
512 raise error.ProgrammingError(
513 b'illegal header on 304 response: %s'
513 b'illegal header on 304 response: %s'
514 % b', '.join(sorted(badheaders))
514 % b', '.join(sorted(badheaders))
515 )
515 )
516
516
517 if self._bodygen is not None or self._bodywillwrite:
517 if self._bodygen is not None or self._bodywillwrite:
518 raise error.ProgrammingError(
518 raise error.ProgrammingError(
519 b"must use setbodybytes('') with 304 responses"
519 b"must use setbodybytes('') with 304 responses"
520 )
520 )
521
521
522 # Various HTTP clients (notably httplib) won't read the HTTP response
522 # Various HTTP clients (notably httplib) won't read the HTTP response
523 # until the HTTP request has been sent in full. If servers (us) send a
523 # until the HTTP request has been sent in full. If servers (us) send a
524 # response before the HTTP request has been fully sent, the connection
524 # response before the HTTP request has been fully sent, the connection
525 # may deadlock because neither end is reading.
525 # may deadlock because neither end is reading.
526 #
526 #
527 # We work around this by "draining" the request data before
527 # We work around this by "draining" the request data before
528 # sending any response in some conditions.
528 # sending any response in some conditions.
529 drain = False
529 drain = False
530 close = False
530 close = False
531
531
532 # If the client sent Expect: 100-continue, we assume it is smart enough
532 # If the client sent Expect: 100-continue, we assume it is smart enough
533 # to deal with the server sending a response before reading the request.
533 # to deal with the server sending a response before reading the request.
534 # (httplib doesn't do this.)
534 # (httplib doesn't do this.)
535 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
535 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
536 pass
536 pass
537 # Only tend to request methods that have bodies. Strictly speaking,
537 # Only tend to request methods that have bodies. Strictly speaking,
538 # we should sniff for a body. But this is fine for our existing
538 # we should sniff for a body. But this is fine for our existing
539 # WSGI applications.
539 # WSGI applications.
540 elif self._req.method not in (b'POST', b'PUT'):
540 elif self._req.method not in (b'POST', b'PUT'):
541 pass
541 pass
542 else:
542 else:
543 # If we don't know how much data to read, there's no guarantee
543 # If we don't know how much data to read, there's no guarantee
544 # that we can drain the request responsibly. The WSGI
544 # that we can drain the request responsibly. The WSGI
545 # specification only says that servers *should* ensure the
545 # specification only says that servers *should* ensure the
546 # input stream doesn't overrun the actual request. So there's
546 # input stream doesn't overrun the actual request. So there's
547 # no guarantee that reading until EOF won't corrupt the stream
547 # no guarantee that reading until EOF won't corrupt the stream
548 # state.
548 # state.
549 if not isinstance(self._req.bodyfh, util.cappedreader):
549 if not isinstance(self._req.bodyfh, util.cappedreader):
550 close = True
550 close = True
551 else:
551 else:
552 # We /could/ only drain certain HTTP response codes. But 200 and
552 # We /could/ only drain certain HTTP response codes. But 200 and
553 # non-200 wire protocol responses both require draining. Since
553 # non-200 wire protocol responses both require draining. Since
554 # we have a capped reader in place for all situations where we
554 # we have a capped reader in place for all situations where we
555 # drain, it is safe to read from that stream. We'll either do
555 # drain, it is safe to read from that stream. We'll either do
556 # a drain or no-op if we're already at EOF.
556 # a drain or no-op if we're already at EOF.
557 drain = True
557 drain = True
558
558
559 if close:
559 if close:
560 self.headers[b'Connection'] = b'Close'
560 self.headers[b'Connection'] = b'Close'
561
561
562 if drain:
562 if drain:
563 assert isinstance(self._req.bodyfh, util.cappedreader)
563 assert isinstance(self._req.bodyfh, util.cappedreader)
564 while True:
564 while True:
565 chunk = self._req.bodyfh.read(32768)
565 chunk = self._req.bodyfh.read(32768)
566 if not chunk:
566 if not chunk:
567 break
567 break
568
568
569 strheaders = [
569 strheaders = [
570 (pycompat.strurl(k), pycompat.strurl(v))
570 (pycompat.strurl(k), pycompat.strurl(v))
571 for k, v in self.headers.items()
571 for k, v in self.headers.items()
572 ]
572 ]
573 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
573 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
574
574
575 if self._bodybytes:
575 if self._bodybytes:
576 yield self._bodybytes
576 yield self._bodybytes
577 elif self._bodygen:
577 elif self._bodygen:
578 for chunk in self._bodygen:
578 for chunk in self._bodygen:
579 # PEP-3333 says that output must be bytes. And some WSGI
579 # PEP-3333 says that output must be bytes. And some WSGI
580 # implementations enforce this. We cast bytes-like types here
580 # implementations enforce this. We cast bytes-like types here
581 # for convenience.
581 # for convenience.
582 if isinstance(chunk, bytearray):
582 if isinstance(chunk, bytearray):
583 chunk = bytes(chunk)
583 chunk = bytes(chunk)
584
584
585 yield chunk
585 yield chunk
586 elif self._bodywillwrite:
586 elif self._bodywillwrite:
587 self._bodywritefn = write
587 self._bodywritefn = write
588 else:
588 else:
589 error.ProgrammingError(b'do not know how to send body')
589 error.ProgrammingError(b'do not know how to send body')
590
590
591 def getbodyfile(self):
591 def getbodyfile(self):
592 """Obtain a file object like object representing the response body.
592 """Obtain a file object like object representing the response body.
593
593
594 For this to work, you must call ``setbodywillwrite()`` and then
594 For this to work, you must call ``setbodywillwrite()`` and then
595 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
595 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
596 function won't run to completion unless the generator is advanced. The
596 function won't run to completion unless the generator is advanced. The
597 generator yields not items. The easiest way to consume it is with
597 generator yields not items. The easiest way to consume it is with
598 ``list(res.sendresponse())``, which should resolve to an empty list -
598 ``list(res.sendresponse())``, which should resolve to an empty list -
599 ``[]``.
599 ``[]``.
600 """
600 """
601 if not self._bodywillwrite:
601 if not self._bodywillwrite:
602 raise error.ProgrammingError(b'must call setbodywillwrite() first')
602 raise error.ProgrammingError(b'must call setbodywillwrite() first')
603
603
604 if not self._started:
604 if not self._started:
605 raise error.ProgrammingError(
605 raise error.ProgrammingError(
606 b'must call sendresponse() first; did '
606 b'must call sendresponse() first; did '
607 b'you remember to consume it since it '
607 b'you remember to consume it since it '
608 b'is a generator?'
608 b'is a generator?'
609 )
609 )
610
610
611 assert self._bodywritefn
611 assert self._bodywritefn
612 return offsettrackingwriter(self._bodywritefn)
612 return offsettrackingwriter(self._bodywritefn)
613
613
614
614
615 def wsgiapplication(app_maker):
615 def wsgiapplication(app_maker):
616 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
616 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
617 can and should now be used as a WSGI application.'''
617 can and should now be used as a WSGI application.'''
618 application = app_maker()
618 application = app_maker()
619
619
620 def run_wsgi(env, respond):
620 def run_wsgi(env, respond):
621 return application(env, respond)
621 return application(env, respond)
622
622
623 return run_wsgi
623 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now