##// END OF EJS Templates
hgweb: remove Python 3 conditional...
Gregory Szorc -
r49760:7eebe563 default
parent child Browse files
Show More
@@ -1,634 +1,632
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 # import wsgiref.validate
10 # import wsgiref.validate
11
11
12 from ..thirdparty import attr
12 from ..thirdparty import attr
13 from .. import (
13 from .. import (
14 encoding,
14 encoding,
15 error,
15 error,
16 pycompat,
16 pycompat,
17 util,
17 util,
18 )
18 )
19 from ..utils import (
19 from ..utils import (
20 urlutil,
20 urlutil,
21 )
21 )
22
22
23
23
24 class multidict(object):
24 class multidict(object):
25 """A dict like object that can store multiple values for a key.
25 """A dict like object that can store multiple values for a key.
26
26
27 Used to store parsed request parameters.
27 Used to store parsed request parameters.
28
28
29 This is inspired by WebOb's class of the same name.
29 This is inspired by WebOb's class of the same name.
30 """
30 """
31
31
32 def __init__(self):
32 def __init__(self):
33 self._items = {}
33 self._items = {}
34
34
35 def __getitem__(self, key):
35 def __getitem__(self, key):
36 """Returns the last set value for a key."""
36 """Returns the last set value for a key."""
37 return self._items[key][-1]
37 return self._items[key][-1]
38
38
39 def __setitem__(self, key, value):
39 def __setitem__(self, key, value):
40 """Replace a values for a key with a new value."""
40 """Replace a values for a key with a new value."""
41 self._items[key] = [value]
41 self._items[key] = [value]
42
42
43 def __delitem__(self, key):
43 def __delitem__(self, key):
44 """Delete all values for a key."""
44 """Delete all values for a key."""
45 del self._items[key]
45 del self._items[key]
46
46
47 def __contains__(self, key):
47 def __contains__(self, key):
48 return key in self._items
48 return key in self._items
49
49
50 def __len__(self):
50 def __len__(self):
51 return len(self._items)
51 return len(self._items)
52
52
53 def get(self, key, default=None):
53 def get(self, key, default=None):
54 try:
54 try:
55 return self.__getitem__(key)
55 return self.__getitem__(key)
56 except KeyError:
56 except KeyError:
57 return default
57 return default
58
58
59 def add(self, key, value):
59 def add(self, key, value):
60 """Add a new value for a key. Does not replace existing values."""
60 """Add a new value for a key. Does not replace existing values."""
61 self._items.setdefault(key, []).append(value)
61 self._items.setdefault(key, []).append(value)
62
62
63 def getall(self, key):
63 def getall(self, key):
64 """Obtains all values for a key."""
64 """Obtains all values for a key."""
65 return self._items.get(key, [])
65 return self._items.get(key, [])
66
66
67 def getone(self, key):
67 def getone(self, key):
68 """Obtain a single value for a key.
68 """Obtain a single value for a key.
69
69
70 Raises KeyError if key not defined or it has multiple values set.
70 Raises KeyError if key not defined or it has multiple values set.
71 """
71 """
72 vals = self._items[key]
72 vals = self._items[key]
73
73
74 if len(vals) > 1:
74 if len(vals) > 1:
75 raise KeyError(b'multiple values for %r' % key)
75 raise KeyError(b'multiple values for %r' % key)
76
76
77 return vals[0]
77 return vals[0]
78
78
79 def asdictoflists(self):
79 def asdictoflists(self):
80 return {k: list(v) for k, v in pycompat.iteritems(self._items)}
80 return {k: list(v) for k, v in pycompat.iteritems(self._items)}
81
81
82
82
83 @attr.s(frozen=True)
83 @attr.s(frozen=True)
84 class parsedrequest(object):
84 class parsedrequest(object):
85 """Represents a parsed WSGI request.
85 """Represents a parsed WSGI request.
86
86
87 Contains both parsed parameters as well as a handle on the input stream.
87 Contains both parsed parameters as well as a handle on the input stream.
88 """
88 """
89
89
90 # Request method.
90 # Request method.
91 method = attr.ib()
91 method = attr.ib()
92 # Full URL for this request.
92 # Full URL for this request.
93 url = attr.ib()
93 url = attr.ib()
94 # URL without any path components. Just <proto>://<host><port>.
94 # URL without any path components. Just <proto>://<host><port>.
95 baseurl = attr.ib()
95 baseurl = attr.ib()
96 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
96 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
97 # of HTTP: Host header for hostname. This is likely what clients used.
97 # of HTTP: Host header for hostname. This is likely what clients used.
98 advertisedurl = attr.ib()
98 advertisedurl = attr.ib()
99 advertisedbaseurl = attr.ib()
99 advertisedbaseurl = attr.ib()
100 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
100 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
101 urlscheme = attr.ib()
101 urlscheme = attr.ib()
102 # Value of REMOTE_USER, if set, or None.
102 # Value of REMOTE_USER, if set, or None.
103 remoteuser = attr.ib()
103 remoteuser = attr.ib()
104 # Value of REMOTE_HOST, if set, or None.
104 # Value of REMOTE_HOST, if set, or None.
105 remotehost = attr.ib()
105 remotehost = attr.ib()
106 # Relative WSGI application path. If defined, will begin with a
106 # Relative WSGI application path. If defined, will begin with a
107 # ``/``.
107 # ``/``.
108 apppath = attr.ib()
108 apppath = attr.ib()
109 # List of path parts to be used for dispatch.
109 # List of path parts to be used for dispatch.
110 dispatchparts = attr.ib()
110 dispatchparts = attr.ib()
111 # URL path component (no query string) used for dispatch. Can be
111 # URL path component (no query string) used for dispatch. Can be
112 # ``None`` to signal no path component given to the request, an
112 # ``None`` to signal no path component given to the request, an
113 # empty string to signal a request to the application's root URL,
113 # empty string to signal a request to the application's root URL,
114 # or a string not beginning with ``/`` containing the requested
114 # or a string not beginning with ``/`` containing the requested
115 # path under the application.
115 # path under the application.
116 dispatchpath = attr.ib()
116 dispatchpath = attr.ib()
117 # The name of the repository being accessed.
117 # The name of the repository being accessed.
118 reponame = attr.ib()
118 reponame = attr.ib()
119 # Raw query string (part after "?" in URL).
119 # Raw query string (part after "?" in URL).
120 querystring = attr.ib()
120 querystring = attr.ib()
121 # multidict of query string parameters.
121 # multidict of query string parameters.
122 qsparams = attr.ib()
122 qsparams = attr.ib()
123 # wsgiref.headers.Headers instance. Operates like a dict with case
123 # wsgiref.headers.Headers instance. Operates like a dict with case
124 # insensitive keys.
124 # insensitive keys.
125 headers = attr.ib()
125 headers = attr.ib()
126 # Request body input stream.
126 # Request body input stream.
127 bodyfh = attr.ib()
127 bodyfh = attr.ib()
128 # WSGI environment dict, unmodified.
128 # WSGI environment dict, unmodified.
129 rawenv = attr.ib()
129 rawenv = attr.ib()
130
130
131
131
132 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
132 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
133 """Parse URL components from environment variables.
133 """Parse URL components from environment variables.
134
134
135 WSGI defines request attributes via environment variables. This function
135 WSGI defines request attributes via environment variables. This function
136 parses the environment variables into a data structure.
136 parses the environment variables into a data structure.
137
137
138 If ``reponame`` is defined, the leading path components matching that
138 If ``reponame`` is defined, the leading path components matching that
139 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
139 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
140 This simulates the world view of a WSGI application that processes
140 This simulates the world view of a WSGI application that processes
141 requests from the base URL of a repo.
141 requests from the base URL of a repo.
142
142
143 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
143 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
144 is defined, it is used - instead of the WSGI environment variables - for
144 is defined, it is used - instead of the WSGI environment variables - for
145 constructing URL components up to and including the WSGI application path.
145 constructing URL components up to and including the WSGI application path.
146 For example, if the current WSGI application is at ``/repo`` and a request
146 For example, if the current WSGI application is at ``/repo`` and a request
147 is made to ``/rev/@`` with this argument set to
147 is made to ``/rev/@`` with this argument set to
148 ``http://myserver:9000/prefix``, the URL and path components will resolve as
148 ``http://myserver:9000/prefix``, the URL and path components will resolve as
149 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
149 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
150 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
150 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
151 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
151 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
152
152
153 ``bodyfh`` can be used to specify a file object to read the request body
153 ``bodyfh`` can be used to specify a file object to read the request body
154 from. If not defined, ``wsgi.input`` from the environment dict is used.
154 from. If not defined, ``wsgi.input`` from the environment dict is used.
155 """
155 """
156 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
156 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
157
157
158 # We first validate that the incoming object conforms with the WSGI spec.
158 # We first validate that the incoming object conforms with the WSGI spec.
159 # We only want to be dealing with spec-conforming WSGI implementations.
159 # We only want to be dealing with spec-conforming WSGI implementations.
160 # TODO enable this once we fix internal violations.
160 # TODO enable this once we fix internal violations.
161 # wsgiref.validate.check_environ(env)
161 # wsgiref.validate.check_environ(env)
162
162
163 # PEP-0333 states that environment keys and values are native strings
163 # PEP-0333 states that environment keys and values are native strings.
164 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
164 # The code points for the Unicode strings on Python 3 must be between
165 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
165 # \00000-\000FF. We deal with bytes in Mercurial, so mass convert string
166 # in Mercurial, so mass convert string keys and values to bytes.
166 # keys and values to bytes.
167 if pycompat.ispy3:
168
169 def tobytes(s):
167 def tobytes(s):
170 if not isinstance(s, str):
168 if not isinstance(s, str):
171 return s
169 return s
172 if pycompat.iswindows:
170 if pycompat.iswindows:
173 # This is what mercurial.encoding does for os.environ on
171 # This is what mercurial.encoding does for os.environ on
174 # Windows.
172 # Windows.
175 return encoding.strtolocal(s)
173 return encoding.strtolocal(s)
176 else:
174 else:
177 # This is what is documented to be used for os.environ on Unix.
175 # This is what is documented to be used for os.environ on Unix.
178 return pycompat.fsencode(s)
176 return pycompat.fsencode(s)
179
177
180 env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
178 env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
181
179
182 # Some hosting solutions are emulating hgwebdir, and dispatching directly
180 # Some hosting solutions are emulating hgwebdir, and dispatching directly
183 # to an hgweb instance using this environment variable. This was always
181 # to an hgweb instance using this environment variable. This was always
184 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
182 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
185 if not reponame:
183 if not reponame:
186 reponame = env.get(b'REPO_NAME')
184 reponame = env.get(b'REPO_NAME')
187
185
188 if altbaseurl:
186 if altbaseurl:
189 altbaseurl = urlutil.url(altbaseurl)
187 altbaseurl = urlutil.url(altbaseurl)
190
188
191 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
189 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
192 # the environment variables.
190 # the environment variables.
193 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
191 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
194 # how URLs are reconstructed.
192 # how URLs are reconstructed.
195 fullurl = env[b'wsgi.url_scheme'] + b'://'
193 fullurl = env[b'wsgi.url_scheme'] + b'://'
196
194
197 if altbaseurl and altbaseurl.scheme:
195 if altbaseurl and altbaseurl.scheme:
198 advertisedfullurl = altbaseurl.scheme + b'://'
196 advertisedfullurl = altbaseurl.scheme + b'://'
199 else:
197 else:
200 advertisedfullurl = fullurl
198 advertisedfullurl = fullurl
201
199
202 def addport(s, port):
200 def addport(s, port):
203 if s.startswith(b'https://'):
201 if s.startswith(b'https://'):
204 if port != b'443':
202 if port != b'443':
205 s += b':' + port
203 s += b':' + port
206 else:
204 else:
207 if port != b'80':
205 if port != b'80':
208 s += b':' + port
206 s += b':' + port
209
207
210 return s
208 return s
211
209
212 if env.get(b'HTTP_HOST'):
210 if env.get(b'HTTP_HOST'):
213 fullurl += env[b'HTTP_HOST']
211 fullurl += env[b'HTTP_HOST']
214 else:
212 else:
215 fullurl += env[b'SERVER_NAME']
213 fullurl += env[b'SERVER_NAME']
216 fullurl = addport(fullurl, env[b'SERVER_PORT'])
214 fullurl = addport(fullurl, env[b'SERVER_PORT'])
217
215
218 if altbaseurl and altbaseurl.host:
216 if altbaseurl and altbaseurl.host:
219 advertisedfullurl += altbaseurl.host
217 advertisedfullurl += altbaseurl.host
220
218
221 if altbaseurl.port:
219 if altbaseurl.port:
222 port = altbaseurl.port
220 port = altbaseurl.port
223 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
221 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
224 port = b'80'
222 port = b'80'
225 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
223 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
226 port = b'443'
224 port = b'443'
227 else:
225 else:
228 port = env[b'SERVER_PORT']
226 port = env[b'SERVER_PORT']
229
227
230 advertisedfullurl = addport(advertisedfullurl, port)
228 advertisedfullurl = addport(advertisedfullurl, port)
231 else:
229 else:
232 advertisedfullurl += env[b'SERVER_NAME']
230 advertisedfullurl += env[b'SERVER_NAME']
233 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
231 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
234
232
235 baseurl = fullurl
233 baseurl = fullurl
236 advertisedbaseurl = advertisedfullurl
234 advertisedbaseurl = advertisedfullurl
237
235
238 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
236 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
239 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
237 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
240
238
241 if altbaseurl:
239 if altbaseurl:
242 path = altbaseurl.path or b''
240 path = altbaseurl.path or b''
243 if path and not path.startswith(b'/'):
241 if path and not path.startswith(b'/'):
244 path = b'/' + path
242 path = b'/' + path
245 advertisedfullurl += util.urlreq.quote(path)
243 advertisedfullurl += util.urlreq.quote(path)
246 else:
244 else:
247 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
245 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
248
246
249 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
247 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
250
248
251 if env.get(b'QUERY_STRING'):
249 if env.get(b'QUERY_STRING'):
252 fullurl += b'?' + env[b'QUERY_STRING']
250 fullurl += b'?' + env[b'QUERY_STRING']
253 advertisedfullurl += b'?' + env[b'QUERY_STRING']
251 advertisedfullurl += b'?' + env[b'QUERY_STRING']
254
252
255 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
253 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
256 # that represents the repository being dispatched to. When computing
254 # that represents the repository being dispatched to. When computing
257 # the dispatch info, we ignore these leading path components.
255 # the dispatch info, we ignore these leading path components.
258
256
259 if altbaseurl:
257 if altbaseurl:
260 apppath = altbaseurl.path or b''
258 apppath = altbaseurl.path or b''
261 if apppath and not apppath.startswith(b'/'):
259 if apppath and not apppath.startswith(b'/'):
262 apppath = b'/' + apppath
260 apppath = b'/' + apppath
263 else:
261 else:
264 apppath = env.get(b'SCRIPT_NAME', b'')
262 apppath = env.get(b'SCRIPT_NAME', b'')
265
263
266 if reponame:
264 if reponame:
267 repoprefix = b'/' + reponame.strip(b'/')
265 repoprefix = b'/' + reponame.strip(b'/')
268
266
269 if not env.get(b'PATH_INFO'):
267 if not env.get(b'PATH_INFO'):
270 raise error.ProgrammingError(b'reponame requires PATH_INFO')
268 raise error.ProgrammingError(b'reponame requires PATH_INFO')
271
269
272 if not env[b'PATH_INFO'].startswith(repoprefix):
270 if not env[b'PATH_INFO'].startswith(repoprefix):
273 raise error.ProgrammingError(
271 raise error.ProgrammingError(
274 b'PATH_INFO does not begin with repo '
272 b'PATH_INFO does not begin with repo '
275 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
273 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
276 )
274 )
277
275
278 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
276 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
279
277
280 if dispatchpath and not dispatchpath.startswith(b'/'):
278 if dispatchpath and not dispatchpath.startswith(b'/'):
281 raise error.ProgrammingError(
279 raise error.ProgrammingError(
282 b'reponame prefix of PATH_INFO does '
280 b'reponame prefix of PATH_INFO does '
283 b'not end at path delimiter: %s (%s)'
281 b'not end at path delimiter: %s (%s)'
284 % (env[b'PATH_INFO'], reponame)
282 % (env[b'PATH_INFO'], reponame)
285 )
283 )
286
284
287 apppath = apppath.rstrip(b'/') + repoprefix
285 apppath = apppath.rstrip(b'/') + repoprefix
288 dispatchparts = dispatchpath.strip(b'/').split(b'/')
286 dispatchparts = dispatchpath.strip(b'/').split(b'/')
289 dispatchpath = b'/'.join(dispatchparts)
287 dispatchpath = b'/'.join(dispatchparts)
290
288
291 elif b'PATH_INFO' in env:
289 elif b'PATH_INFO' in env:
292 if env[b'PATH_INFO'].strip(b'/'):
290 if env[b'PATH_INFO'].strip(b'/'):
293 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
291 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
294 dispatchpath = b'/'.join(dispatchparts)
292 dispatchpath = b'/'.join(dispatchparts)
295 else:
293 else:
296 dispatchparts = []
294 dispatchparts = []
297 dispatchpath = b''
295 dispatchpath = b''
298 else:
296 else:
299 dispatchparts = []
297 dispatchparts = []
300 dispatchpath = None
298 dispatchpath = None
301
299
302 querystring = env.get(b'QUERY_STRING', b'')
300 querystring = env.get(b'QUERY_STRING', b'')
303
301
304 # We store as a list so we have ordering information. We also store as
302 # We store as a list so we have ordering information. We also store as
305 # a dict to facilitate fast lookup.
303 # a dict to facilitate fast lookup.
306 qsparams = multidict()
304 qsparams = multidict()
307 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
305 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
308 qsparams.add(k, v)
306 qsparams.add(k, v)
309
307
310 # HTTP_* keys contain HTTP request headers. The Headers structure should
308 # HTTP_* keys contain HTTP request headers. The Headers structure should
311 # perform case normalization for us. We just rewrite underscore to dash
309 # perform case normalization for us. We just rewrite underscore to dash
312 # so keys match what likely went over the wire.
310 # so keys match what likely went over the wire.
313 headers = []
311 headers = []
314 for k, v in pycompat.iteritems(env):
312 for k, v in pycompat.iteritems(env):
315 if k.startswith(b'HTTP_'):
313 if k.startswith(b'HTTP_'):
316 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
314 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
317
315
318 from . import wsgiheaders # avoid cycle
316 from . import wsgiheaders # avoid cycle
319
317
320 headers = wsgiheaders.Headers(headers)
318 headers = wsgiheaders.Headers(headers)
321
319
322 # This is kind of a lie because the HTTP header wasn't explicitly
320 # This is kind of a lie because the HTTP header wasn't explicitly
323 # sent. But for all intents and purposes it should be OK to lie about
321 # sent. But for all intents and purposes it should be OK to lie about
324 # this, since a consumer will either either value to determine how many
322 # this, since a consumer will either either value to determine how many
325 # bytes are available to read.
323 # bytes are available to read.
326 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
324 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
327 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
325 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
328
326
329 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
327 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
330 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
328 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
331
329
332 if bodyfh is None:
330 if bodyfh is None:
333 bodyfh = env[b'wsgi.input']
331 bodyfh = env[b'wsgi.input']
334 if b'Content-Length' in headers:
332 if b'Content-Length' in headers:
335 bodyfh = util.cappedreader(
333 bodyfh = util.cappedreader(
336 bodyfh, int(headers[b'Content-Length'] or b'0')
334 bodyfh, int(headers[b'Content-Length'] or b'0')
337 )
335 )
338
336
339 return parsedrequest(
337 return parsedrequest(
340 method=env[b'REQUEST_METHOD'],
338 method=env[b'REQUEST_METHOD'],
341 url=fullurl,
339 url=fullurl,
342 baseurl=baseurl,
340 baseurl=baseurl,
343 advertisedurl=advertisedfullurl,
341 advertisedurl=advertisedfullurl,
344 advertisedbaseurl=advertisedbaseurl,
342 advertisedbaseurl=advertisedbaseurl,
345 urlscheme=env[b'wsgi.url_scheme'],
343 urlscheme=env[b'wsgi.url_scheme'],
346 remoteuser=env.get(b'REMOTE_USER'),
344 remoteuser=env.get(b'REMOTE_USER'),
347 remotehost=env.get(b'REMOTE_HOST'),
345 remotehost=env.get(b'REMOTE_HOST'),
348 apppath=apppath,
346 apppath=apppath,
349 dispatchparts=dispatchparts,
347 dispatchparts=dispatchparts,
350 dispatchpath=dispatchpath,
348 dispatchpath=dispatchpath,
351 reponame=reponame,
349 reponame=reponame,
352 querystring=querystring,
350 querystring=querystring,
353 qsparams=qsparams,
351 qsparams=qsparams,
354 headers=headers,
352 headers=headers,
355 bodyfh=bodyfh,
353 bodyfh=bodyfh,
356 rawenv=env,
354 rawenv=env,
357 )
355 )
358
356
359
357
360 class offsettrackingwriter(object):
358 class offsettrackingwriter(object):
361 """A file object like object that is append only and tracks write count.
359 """A file object like object that is append only and tracks write count.
362
360
363 Instances are bound to a callable. This callable is called with data
361 Instances are bound to a callable. This callable is called with data
364 whenever a ``write()`` is attempted.
362 whenever a ``write()`` is attempted.
365
363
366 Instances track the amount of written data so they can answer ``tell()``
364 Instances track the amount of written data so they can answer ``tell()``
367 requests.
365 requests.
368
366
369 The intent of this class is to wrap the ``write()`` function returned by
367 The intent of this class is to wrap the ``write()`` function returned by
370 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
368 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
371 not a file object, it doesn't implement other file object methods.
369 not a file object, it doesn't implement other file object methods.
372 """
370 """
373
371
374 def __init__(self, writefn):
372 def __init__(self, writefn):
375 self._write = writefn
373 self._write = writefn
376 self._offset = 0
374 self._offset = 0
377
375
378 def write(self, s):
376 def write(self, s):
379 res = self._write(s)
377 res = self._write(s)
380 # Some Python objects don't report the number of bytes written.
378 # Some Python objects don't report the number of bytes written.
381 if res is None:
379 if res is None:
382 self._offset += len(s)
380 self._offset += len(s)
383 else:
381 else:
384 self._offset += res
382 self._offset += res
385
383
386 def flush(self):
384 def flush(self):
387 pass
385 pass
388
386
389 def tell(self):
387 def tell(self):
390 return self._offset
388 return self._offset
391
389
392
390
393 class wsgiresponse(object):
391 class wsgiresponse(object):
394 """Represents a response to a WSGI request.
392 """Represents a response to a WSGI request.
395
393
396 A response consists of a status line, headers, and a body.
394 A response consists of a status line, headers, and a body.
397
395
398 Consumers must populate the ``status`` and ``headers`` fields and
396 Consumers must populate the ``status`` and ``headers`` fields and
399 make a call to a ``setbody*()`` method before the response can be
397 make a call to a ``setbody*()`` method before the response can be
400 issued.
398 issued.
401
399
402 When it is time to start sending the response over the wire,
400 When it is time to start sending the response over the wire,
403 ``sendresponse()`` is called. It handles emitting the header portion
401 ``sendresponse()`` is called. It handles emitting the header portion
404 of the response message. It then yields chunks of body data to be
402 of the response message. It then yields chunks of body data to be
405 written to the peer. Typically, the WSGI application itself calls
403 written to the peer. Typically, the WSGI application itself calls
406 and returns the value from ``sendresponse()``.
404 and returns the value from ``sendresponse()``.
407 """
405 """
408
406
409 def __init__(self, req, startresponse):
407 def __init__(self, req, startresponse):
410 """Create an empty response tied to a specific request.
408 """Create an empty response tied to a specific request.
411
409
412 ``req`` is a ``parsedrequest``. ``startresponse`` is the
410 ``req`` is a ``parsedrequest``. ``startresponse`` is the
413 ``start_response`` function passed to the WSGI application.
411 ``start_response`` function passed to the WSGI application.
414 """
412 """
415 self._req = req
413 self._req = req
416 self._startresponse = startresponse
414 self._startresponse = startresponse
417
415
418 self.status = None
416 self.status = None
419 from . import wsgiheaders # avoid cycle
417 from . import wsgiheaders # avoid cycle
420
418
421 self.headers = wsgiheaders.Headers([])
419 self.headers = wsgiheaders.Headers([])
422
420
423 self._bodybytes = None
421 self._bodybytes = None
424 self._bodygen = None
422 self._bodygen = None
425 self._bodywillwrite = False
423 self._bodywillwrite = False
426 self._started = False
424 self._started = False
427 self._bodywritefn = None
425 self._bodywritefn = None
428
426
429 def _verifybody(self):
427 def _verifybody(self):
430 if (
428 if (
431 self._bodybytes is not None
429 self._bodybytes is not None
432 or self._bodygen is not None
430 or self._bodygen is not None
433 or self._bodywillwrite
431 or self._bodywillwrite
434 ):
432 ):
435 raise error.ProgrammingError(b'cannot define body multiple times')
433 raise error.ProgrammingError(b'cannot define body multiple times')
436
434
437 def setbodybytes(self, b):
435 def setbodybytes(self, b):
438 """Define the response body as static bytes.
436 """Define the response body as static bytes.
439
437
440 The empty string signals that there is no response body.
438 The empty string signals that there is no response body.
441 """
439 """
442 self._verifybody()
440 self._verifybody()
443 self._bodybytes = b
441 self._bodybytes = b
444 self.headers[b'Content-Length'] = b'%d' % len(b)
442 self.headers[b'Content-Length'] = b'%d' % len(b)
445
443
446 def setbodygen(self, gen):
444 def setbodygen(self, gen):
447 """Define the response body as a generator of bytes."""
445 """Define the response body as a generator of bytes."""
448 self._verifybody()
446 self._verifybody()
449 self._bodygen = gen
447 self._bodygen = gen
450
448
451 def setbodywillwrite(self):
449 def setbodywillwrite(self):
452 """Signal an intent to use write() to emit the response body.
450 """Signal an intent to use write() to emit the response body.
453
451
454 **This is the least preferred way to send a body.**
452 **This is the least preferred way to send a body.**
455
453
456 It is preferred for WSGI applications to emit a generator of chunks
454 It is preferred for WSGI applications to emit a generator of chunks
457 constituting the response body. However, some consumers can't emit
455 constituting the response body. However, some consumers can't emit
458 data this way. So, WSGI provides a way to obtain a ``write(data)``
456 data this way. So, WSGI provides a way to obtain a ``write(data)``
459 function that can be used to synchronously perform an unbuffered
457 function that can be used to synchronously perform an unbuffered
460 write.
458 write.
461
459
462 Calling this function signals an intent to produce the body in this
460 Calling this function signals an intent to produce the body in this
463 manner.
461 manner.
464 """
462 """
465 self._verifybody()
463 self._verifybody()
466 self._bodywillwrite = True
464 self._bodywillwrite = True
467
465
468 def sendresponse(self):
466 def sendresponse(self):
469 """Send the generated response to the client.
467 """Send the generated response to the client.
470
468
471 Before this is called, ``status`` must be set and one of
469 Before this is called, ``status`` must be set and one of
472 ``setbodybytes()`` or ``setbodygen()`` must be called.
470 ``setbodybytes()`` or ``setbodygen()`` must be called.
473
471
474 Calling this method multiple times is not allowed.
472 Calling this method multiple times is not allowed.
475 """
473 """
476 if self._started:
474 if self._started:
477 raise error.ProgrammingError(
475 raise error.ProgrammingError(
478 b'sendresponse() called multiple times'
476 b'sendresponse() called multiple times'
479 )
477 )
480
478
481 self._started = True
479 self._started = True
482
480
483 if not self.status:
481 if not self.status:
484 raise error.ProgrammingError(b'status line not defined')
482 raise error.ProgrammingError(b'status line not defined')
485
483
486 if (
484 if (
487 self._bodybytes is None
485 self._bodybytes is None
488 and self._bodygen is None
486 and self._bodygen is None
489 and not self._bodywillwrite
487 and not self._bodywillwrite
490 ):
488 ):
491 raise error.ProgrammingError(b'response body not defined')
489 raise error.ProgrammingError(b'response body not defined')
492
490
493 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
491 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
494 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
492 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
495 # and SHOULD NOT generate other headers unless they could be used
493 # and SHOULD NOT generate other headers unless they could be used
496 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
494 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
497 # states that no response body can be issued. Content-Length can
495 # states that no response body can be issued. Content-Length can
498 # be sent. But if it is present, it should be the size of the response
496 # be sent. But if it is present, it should be the size of the response
499 # that wasn't transferred.
497 # that wasn't transferred.
500 if self.status.startswith(b'304 '):
498 if self.status.startswith(b'304 '):
501 # setbodybytes('') will set C-L to 0. This doesn't conform with the
499 # setbodybytes('') will set C-L to 0. This doesn't conform with the
502 # spec. So remove it.
500 # spec. So remove it.
503 if self.headers.get(b'Content-Length') == b'0':
501 if self.headers.get(b'Content-Length') == b'0':
504 del self.headers[b'Content-Length']
502 del self.headers[b'Content-Length']
505
503
506 # Strictly speaking, this is too strict. But until it causes
504 # Strictly speaking, this is too strict. But until it causes
507 # problems, let's be strict.
505 # problems, let's be strict.
508 badheaders = {
506 badheaders = {
509 k
507 k
510 for k in self.headers.keys()
508 for k in self.headers.keys()
511 if k.lower()
509 if k.lower()
512 not in (
510 not in (
513 b'date',
511 b'date',
514 b'etag',
512 b'etag',
515 b'expires',
513 b'expires',
516 b'cache-control',
514 b'cache-control',
517 b'content-location',
515 b'content-location',
518 b'content-security-policy',
516 b'content-security-policy',
519 b'vary',
517 b'vary',
520 )
518 )
521 }
519 }
522 if badheaders:
520 if badheaders:
523 raise error.ProgrammingError(
521 raise error.ProgrammingError(
524 b'illegal header on 304 response: %s'
522 b'illegal header on 304 response: %s'
525 % b', '.join(sorted(badheaders))
523 % b', '.join(sorted(badheaders))
526 )
524 )
527
525
528 if self._bodygen is not None or self._bodywillwrite:
526 if self._bodygen is not None or self._bodywillwrite:
529 raise error.ProgrammingError(
527 raise error.ProgrammingError(
530 b"must use setbodybytes('') with 304 responses"
528 b"must use setbodybytes('') with 304 responses"
531 )
529 )
532
530
533 # Various HTTP clients (notably httplib) won't read the HTTP response
531 # Various HTTP clients (notably httplib) won't read the HTTP response
534 # until the HTTP request has been sent in full. If servers (us) send a
532 # until the HTTP request has been sent in full. If servers (us) send a
535 # response before the HTTP request has been fully sent, the connection
533 # response before the HTTP request has been fully sent, the connection
536 # may deadlock because neither end is reading.
534 # may deadlock because neither end is reading.
537 #
535 #
538 # We work around this by "draining" the request data before
536 # We work around this by "draining" the request data before
539 # sending any response in some conditions.
537 # sending any response in some conditions.
540 drain = False
538 drain = False
541 close = False
539 close = False
542
540
543 # If the client sent Expect: 100-continue, we assume it is smart enough
541 # If the client sent Expect: 100-continue, we assume it is smart enough
544 # to deal with the server sending a response before reading the request.
542 # to deal with the server sending a response before reading the request.
545 # (httplib doesn't do this.)
543 # (httplib doesn't do this.)
546 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
544 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
547 pass
545 pass
548 # Only tend to request methods that have bodies. Strictly speaking,
546 # Only tend to request methods that have bodies. Strictly speaking,
549 # we should sniff for a body. But this is fine for our existing
547 # we should sniff for a body. But this is fine for our existing
550 # WSGI applications.
548 # WSGI applications.
551 elif self._req.method not in (b'POST', b'PUT'):
549 elif self._req.method not in (b'POST', b'PUT'):
552 pass
550 pass
553 else:
551 else:
554 # If we don't know how much data to read, there's no guarantee
552 # If we don't know how much data to read, there's no guarantee
555 # that we can drain the request responsibly. The WSGI
553 # that we can drain the request responsibly. The WSGI
556 # specification only says that servers *should* ensure the
554 # specification only says that servers *should* ensure the
557 # input stream doesn't overrun the actual request. So there's
555 # input stream doesn't overrun the actual request. So there's
558 # no guarantee that reading until EOF won't corrupt the stream
556 # no guarantee that reading until EOF won't corrupt the stream
559 # state.
557 # state.
560 if not isinstance(self._req.bodyfh, util.cappedreader):
558 if not isinstance(self._req.bodyfh, util.cappedreader):
561 close = True
559 close = True
562 else:
560 else:
563 # We /could/ only drain certain HTTP response codes. But 200 and
561 # We /could/ only drain certain HTTP response codes. But 200 and
564 # non-200 wire protocol responses both require draining. Since
562 # non-200 wire protocol responses both require draining. Since
565 # we have a capped reader in place for all situations where we
563 # we have a capped reader in place for all situations where we
566 # drain, it is safe to read from that stream. We'll either do
564 # drain, it is safe to read from that stream. We'll either do
567 # a drain or no-op if we're already at EOF.
565 # a drain or no-op if we're already at EOF.
568 drain = True
566 drain = True
569
567
570 if close:
568 if close:
571 self.headers[b'Connection'] = b'Close'
569 self.headers[b'Connection'] = b'Close'
572
570
573 if drain:
571 if drain:
574 assert isinstance(self._req.bodyfh, util.cappedreader)
572 assert isinstance(self._req.bodyfh, util.cappedreader)
575 while True:
573 while True:
576 chunk = self._req.bodyfh.read(32768)
574 chunk = self._req.bodyfh.read(32768)
577 if not chunk:
575 if not chunk:
578 break
576 break
579
577
580 strheaders = [
578 strheaders = [
581 (pycompat.strurl(k), pycompat.strurl(v))
579 (pycompat.strurl(k), pycompat.strurl(v))
582 for k, v in self.headers.items()
580 for k, v in self.headers.items()
583 ]
581 ]
584 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
582 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
585
583
586 if self._bodybytes:
584 if self._bodybytes:
587 yield self._bodybytes
585 yield self._bodybytes
588 elif self._bodygen:
586 elif self._bodygen:
589 for chunk in self._bodygen:
587 for chunk in self._bodygen:
590 # PEP-3333 says that output must be bytes. And some WSGI
588 # PEP-3333 says that output must be bytes. And some WSGI
591 # implementations enforce this. We cast bytes-like types here
589 # implementations enforce this. We cast bytes-like types here
592 # for convenience.
590 # for convenience.
593 if isinstance(chunk, bytearray):
591 if isinstance(chunk, bytearray):
594 chunk = bytes(chunk)
592 chunk = bytes(chunk)
595
593
596 yield chunk
594 yield chunk
597 elif self._bodywillwrite:
595 elif self._bodywillwrite:
598 self._bodywritefn = write
596 self._bodywritefn = write
599 else:
597 else:
600 error.ProgrammingError(b'do not know how to send body')
598 error.ProgrammingError(b'do not know how to send body')
601
599
602 def getbodyfile(self):
600 def getbodyfile(self):
603 """Obtain a file object like object representing the response body.
601 """Obtain a file object like object representing the response body.
604
602
605 For this to work, you must call ``setbodywillwrite()`` and then
603 For this to work, you must call ``setbodywillwrite()`` and then
606 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
604 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
607 function won't run to completion unless the generator is advanced. The
605 function won't run to completion unless the generator is advanced. The
608 generator yields not items. The easiest way to consume it is with
606 generator yields not items. The easiest way to consume it is with
609 ``list(res.sendresponse())``, which should resolve to an empty list -
607 ``list(res.sendresponse())``, which should resolve to an empty list -
610 ``[]``.
608 ``[]``.
611 """
609 """
612 if not self._bodywillwrite:
610 if not self._bodywillwrite:
613 raise error.ProgrammingError(b'must call setbodywillwrite() first')
611 raise error.ProgrammingError(b'must call setbodywillwrite() first')
614
612
615 if not self._started:
613 if not self._started:
616 raise error.ProgrammingError(
614 raise error.ProgrammingError(
617 b'must call sendresponse() first; did '
615 b'must call sendresponse() first; did '
618 b'you remember to consume it since it '
616 b'you remember to consume it since it '
619 b'is a generator?'
617 b'is a generator?'
620 )
618 )
621
619
622 assert self._bodywritefn
620 assert self._bodywritefn
623 return offsettrackingwriter(self._bodywritefn)
621 return offsettrackingwriter(self._bodywritefn)
624
622
625
623
626 def wsgiapplication(app_maker):
624 def wsgiapplication(app_maker):
627 """For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
625 """For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
628 can and should now be used as a WSGI application."""
626 can and should now be used as a WSGI application."""
629 application = app_maker()
627 application = app_maker()
630
628
631 def run_wsgi(env, respond):
629 def run_wsgi(env, respond):
632 return application(env, respond)
630 return application(env, respond)
633
631
634 return run_wsgi
632 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now