##// END OF EJS Templates
hgweb: remove wsgirequest.__iter__...
Gregory Szorc -
r36868:e3f809e0 default
parent child Browse files
Show More
@@ -1,315 +1,312
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import cgi
11 import cgi
12 import errno
12 import errno
13 import socket
13 import socket
14 import wsgiref.headers as wsgiheaders
14 import wsgiref.headers as wsgiheaders
15 #import wsgiref.validate
15 #import wsgiref.validate
16
16
17 from .common import (
17 from .common import (
18 ErrorResponse,
18 ErrorResponse,
19 HTTP_NOT_MODIFIED,
19 HTTP_NOT_MODIFIED,
20 statusmessage,
20 statusmessage,
21 )
21 )
22
22
23 from ..thirdparty import (
23 from ..thirdparty import (
24 attr,
24 attr,
25 )
25 )
26 from .. import (
26 from .. import (
27 pycompat,
27 pycompat,
28 util,
28 util,
29 )
29 )
30
30
31 shortcuts = {
31 shortcuts = {
32 'cl': [('cmd', ['changelog']), ('rev', None)],
32 'cl': [('cmd', ['changelog']), ('rev', None)],
33 'sl': [('cmd', ['shortlog']), ('rev', None)],
33 'sl': [('cmd', ['shortlog']), ('rev', None)],
34 'cs': [('cmd', ['changeset']), ('node', None)],
34 'cs': [('cmd', ['changeset']), ('node', None)],
35 'f': [('cmd', ['file']), ('filenode', None)],
35 'f': [('cmd', ['file']), ('filenode', None)],
36 'fl': [('cmd', ['filelog']), ('filenode', None)],
36 'fl': [('cmd', ['filelog']), ('filenode', None)],
37 'fd': [('cmd', ['filediff']), ('node', None)],
37 'fd': [('cmd', ['filediff']), ('node', None)],
38 'fa': [('cmd', ['annotate']), ('filenode', None)],
38 'fa': [('cmd', ['annotate']), ('filenode', None)],
39 'mf': [('cmd', ['manifest']), ('manifest', None)],
39 'mf': [('cmd', ['manifest']), ('manifest', None)],
40 'ca': [('cmd', ['archive']), ('node', None)],
40 'ca': [('cmd', ['archive']), ('node', None)],
41 'tags': [('cmd', ['tags'])],
41 'tags': [('cmd', ['tags'])],
42 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
42 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
43 'static': [('cmd', ['static']), ('file', None)]
43 'static': [('cmd', ['static']), ('file', None)]
44 }
44 }
45
45
46 def normalize(form):
46 def normalize(form):
47 # first expand the shortcuts
47 # first expand the shortcuts
48 for k in shortcuts:
48 for k in shortcuts:
49 if k in form:
49 if k in form:
50 for name, value in shortcuts[k]:
50 for name, value in shortcuts[k]:
51 if value is None:
51 if value is None:
52 value = form[k]
52 value = form[k]
53 form[name] = value
53 form[name] = value
54 del form[k]
54 del form[k]
55 # And strip the values
55 # And strip the values
56 bytesform = {}
56 bytesform = {}
57 for k, v in form.iteritems():
57 for k, v in form.iteritems():
58 bytesform[pycompat.bytesurl(k)] = [
58 bytesform[pycompat.bytesurl(k)] = [
59 pycompat.bytesurl(i.strip()) for i in v]
59 pycompat.bytesurl(i.strip()) for i in v]
60 return bytesform
60 return bytesform
61
61
62 @attr.s(frozen=True)
62 @attr.s(frozen=True)
63 class parsedrequest(object):
63 class parsedrequest(object):
64 """Represents a parsed WSGI request / static HTTP request parameters."""
64 """Represents a parsed WSGI request / static HTTP request parameters."""
65
65
66 # Request method.
66 # Request method.
67 method = attr.ib()
67 method = attr.ib()
68 # Full URL for this request.
68 # Full URL for this request.
69 url = attr.ib()
69 url = attr.ib()
70 # URL without any path components. Just <proto>://<host><port>.
70 # URL without any path components. Just <proto>://<host><port>.
71 baseurl = attr.ib()
71 baseurl = attr.ib()
72 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
72 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
73 # of HTTP: Host header for hostname. This is likely what clients used.
73 # of HTTP: Host header for hostname. This is likely what clients used.
74 advertisedurl = attr.ib()
74 advertisedurl = attr.ib()
75 advertisedbaseurl = attr.ib()
75 advertisedbaseurl = attr.ib()
76 # WSGI application path.
76 # WSGI application path.
77 apppath = attr.ib()
77 apppath = attr.ib()
78 # List of path parts to be used for dispatch.
78 # List of path parts to be used for dispatch.
79 dispatchparts = attr.ib()
79 dispatchparts = attr.ib()
80 # URL path component (no query string) used for dispatch.
80 # URL path component (no query string) used for dispatch.
81 dispatchpath = attr.ib()
81 dispatchpath = attr.ib()
82 # Whether there is a path component to this request. This can be true
82 # Whether there is a path component to this request. This can be true
83 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
83 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
84 havepathinfo = attr.ib()
84 havepathinfo = attr.ib()
85 # Raw query string (part after "?" in URL).
85 # Raw query string (part after "?" in URL).
86 querystring = attr.ib()
86 querystring = attr.ib()
87 # List of 2-tuples of query string arguments.
87 # List of 2-tuples of query string arguments.
88 querystringlist = attr.ib()
88 querystringlist = attr.ib()
89 # Dict of query string arguments. Values are lists with at least 1 item.
89 # Dict of query string arguments. Values are lists with at least 1 item.
90 querystringdict = attr.ib()
90 querystringdict = attr.ib()
91 # wsgiref.headers.Headers instance. Operates like a dict with case
91 # wsgiref.headers.Headers instance. Operates like a dict with case
92 # insensitive keys.
92 # insensitive keys.
93 headers = attr.ib()
93 headers = attr.ib()
94
94
95 def parserequestfromenv(env):
95 def parserequestfromenv(env):
96 """Parse URL components from environment variables.
96 """Parse URL components from environment variables.
97
97
98 WSGI defines request attributes via environment variables. This function
98 WSGI defines request attributes via environment variables. This function
99 parses the environment variables into a data structure.
99 parses the environment variables into a data structure.
100 """
100 """
101 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
101 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
102
102
103 # We first validate that the incoming object conforms with the WSGI spec.
103 # We first validate that the incoming object conforms with the WSGI spec.
104 # We only want to be dealing with spec-conforming WSGI implementations.
104 # We only want to be dealing with spec-conforming WSGI implementations.
105 # TODO enable this once we fix internal violations.
105 # TODO enable this once we fix internal violations.
106 #wsgiref.validate.check_environ(env)
106 #wsgiref.validate.check_environ(env)
107
107
108 # PEP-0333 states that environment keys and values are native strings
108 # PEP-0333 states that environment keys and values are native strings
109 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
109 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
110 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
110 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
111 # in Mercurial, so mass convert string keys and values to bytes.
111 # in Mercurial, so mass convert string keys and values to bytes.
112 if pycompat.ispy3:
112 if pycompat.ispy3:
113 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
113 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
114 env = {k: v.encode('latin-1') if isinstance(v, str) else v
114 env = {k: v.encode('latin-1') if isinstance(v, str) else v
115 for k, v in env.iteritems()}
115 for k, v in env.iteritems()}
116
116
117 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
117 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
118 # the environment variables.
118 # the environment variables.
119 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
119 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
120 # how URLs are reconstructed.
120 # how URLs are reconstructed.
121 fullurl = env['wsgi.url_scheme'] + '://'
121 fullurl = env['wsgi.url_scheme'] + '://'
122 advertisedfullurl = fullurl
122 advertisedfullurl = fullurl
123
123
124 def addport(s):
124 def addport(s):
125 if env['wsgi.url_scheme'] == 'https':
125 if env['wsgi.url_scheme'] == 'https':
126 if env['SERVER_PORT'] != '443':
126 if env['SERVER_PORT'] != '443':
127 s += ':' + env['SERVER_PORT']
127 s += ':' + env['SERVER_PORT']
128 else:
128 else:
129 if env['SERVER_PORT'] != '80':
129 if env['SERVER_PORT'] != '80':
130 s += ':' + env['SERVER_PORT']
130 s += ':' + env['SERVER_PORT']
131
131
132 return s
132 return s
133
133
134 if env.get('HTTP_HOST'):
134 if env.get('HTTP_HOST'):
135 fullurl += env['HTTP_HOST']
135 fullurl += env['HTTP_HOST']
136 else:
136 else:
137 fullurl += env['SERVER_NAME']
137 fullurl += env['SERVER_NAME']
138 fullurl = addport(fullurl)
138 fullurl = addport(fullurl)
139
139
140 advertisedfullurl += env['SERVER_NAME']
140 advertisedfullurl += env['SERVER_NAME']
141 advertisedfullurl = addport(advertisedfullurl)
141 advertisedfullurl = addport(advertisedfullurl)
142
142
143 baseurl = fullurl
143 baseurl = fullurl
144 advertisedbaseurl = advertisedfullurl
144 advertisedbaseurl = advertisedfullurl
145
145
146 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
146 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
147 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
147 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
148 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
148 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
149 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
149 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
150
150
151 if env.get('QUERY_STRING'):
151 if env.get('QUERY_STRING'):
152 fullurl += '?' + env['QUERY_STRING']
152 fullurl += '?' + env['QUERY_STRING']
153 advertisedfullurl += '?' + env['QUERY_STRING']
153 advertisedfullurl += '?' + env['QUERY_STRING']
154
154
155 # When dispatching requests, we look at the URL components (PATH_INFO
155 # When dispatching requests, we look at the URL components (PATH_INFO
156 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
156 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
157 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
157 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
158 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
158 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
159 # root. We also exclude its path components from PATH_INFO when resolving
159 # root. We also exclude its path components from PATH_INFO when resolving
160 # the dispatch path.
160 # the dispatch path.
161
161
162 apppath = env['SCRIPT_NAME']
162 apppath = env['SCRIPT_NAME']
163
163
164 if env.get('REPO_NAME'):
164 if env.get('REPO_NAME'):
165 if not apppath.endswith('/'):
165 if not apppath.endswith('/'):
166 apppath += '/'
166 apppath += '/'
167
167
168 apppath += env.get('REPO_NAME')
168 apppath += env.get('REPO_NAME')
169
169
170 if 'PATH_INFO' in env:
170 if 'PATH_INFO' in env:
171 dispatchparts = env['PATH_INFO'].strip('/').split('/')
171 dispatchparts = env['PATH_INFO'].strip('/').split('/')
172
172
173 # Strip out repo parts.
173 # Strip out repo parts.
174 repoparts = env.get('REPO_NAME', '').split('/')
174 repoparts = env.get('REPO_NAME', '').split('/')
175 if dispatchparts[:len(repoparts)] == repoparts:
175 if dispatchparts[:len(repoparts)] == repoparts:
176 dispatchparts = dispatchparts[len(repoparts):]
176 dispatchparts = dispatchparts[len(repoparts):]
177 else:
177 else:
178 dispatchparts = []
178 dispatchparts = []
179
179
180 dispatchpath = '/'.join(dispatchparts)
180 dispatchpath = '/'.join(dispatchparts)
181
181
182 querystring = env.get('QUERY_STRING', '')
182 querystring = env.get('QUERY_STRING', '')
183
183
184 # We store as a list so we have ordering information. We also store as
184 # We store as a list so we have ordering information. We also store as
185 # a dict to facilitate fast lookup.
185 # a dict to facilitate fast lookup.
186 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
186 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
187
187
188 querystringdict = {}
188 querystringdict = {}
189 for k, v in querystringlist:
189 for k, v in querystringlist:
190 if k in querystringdict:
190 if k in querystringdict:
191 querystringdict[k].append(v)
191 querystringdict[k].append(v)
192 else:
192 else:
193 querystringdict[k] = [v]
193 querystringdict[k] = [v]
194
194
195 # HTTP_* keys contain HTTP request headers. The Headers structure should
195 # HTTP_* keys contain HTTP request headers. The Headers structure should
196 # perform case normalization for us. We just rewrite underscore to dash
196 # perform case normalization for us. We just rewrite underscore to dash
197 # so keys match what likely went over the wire.
197 # so keys match what likely went over the wire.
198 headers = []
198 headers = []
199 for k, v in env.iteritems():
199 for k, v in env.iteritems():
200 if k.startswith('HTTP_'):
200 if k.startswith('HTTP_'):
201 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
201 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
202
202
203 headers = wsgiheaders.Headers(headers)
203 headers = wsgiheaders.Headers(headers)
204
204
205 # This is kind of a lie because the HTTP header wasn't explicitly
205 # This is kind of a lie because the HTTP header wasn't explicitly
206 # sent. But for all intents and purposes it should be OK to lie about
206 # sent. But for all intents and purposes it should be OK to lie about
207 # this, since a consumer will either either value to determine how many
207 # this, since a consumer will either either value to determine how many
208 # bytes are available to read.
208 # bytes are available to read.
209 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
209 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
210 headers['Content-Length'] = env['CONTENT_LENGTH']
210 headers['Content-Length'] = env['CONTENT_LENGTH']
211
211
212 return parsedrequest(method=env['REQUEST_METHOD'],
212 return parsedrequest(method=env['REQUEST_METHOD'],
213 url=fullurl, baseurl=baseurl,
213 url=fullurl, baseurl=baseurl,
214 advertisedurl=advertisedfullurl,
214 advertisedurl=advertisedfullurl,
215 advertisedbaseurl=advertisedbaseurl,
215 advertisedbaseurl=advertisedbaseurl,
216 apppath=apppath,
216 apppath=apppath,
217 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
217 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
218 havepathinfo='PATH_INFO' in env,
218 havepathinfo='PATH_INFO' in env,
219 querystring=querystring,
219 querystring=querystring,
220 querystringlist=querystringlist,
220 querystringlist=querystringlist,
221 querystringdict=querystringdict,
221 querystringdict=querystringdict,
222 headers=headers)
222 headers=headers)
223
223
224 class wsgirequest(object):
224 class wsgirequest(object):
225 """Higher-level API for a WSGI request.
225 """Higher-level API for a WSGI request.
226
226
227 WSGI applications are invoked with 2 arguments. They are used to
227 WSGI applications are invoked with 2 arguments. They are used to
228 instantiate instances of this class, which provides higher-level APIs
228 instantiate instances of this class, which provides higher-level APIs
229 for obtaining request parameters, writing HTTP output, etc.
229 for obtaining request parameters, writing HTTP output, etc.
230 """
230 """
231 def __init__(self, wsgienv, start_response):
231 def __init__(self, wsgienv, start_response):
232 version = wsgienv[r'wsgi.version']
232 version = wsgienv[r'wsgi.version']
233 if (version < (1, 0)) or (version >= (2, 0)):
233 if (version < (1, 0)) or (version >= (2, 0)):
234 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
234 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
235 % version)
235 % version)
236 self.inp = wsgienv[r'wsgi.input']
236 self.inp = wsgienv[r'wsgi.input']
237 self.err = wsgienv[r'wsgi.errors']
237 self.err = wsgienv[r'wsgi.errors']
238 self.threaded = wsgienv[r'wsgi.multithread']
238 self.threaded = wsgienv[r'wsgi.multithread']
239 self.multiprocess = wsgienv[r'wsgi.multiprocess']
239 self.multiprocess = wsgienv[r'wsgi.multiprocess']
240 self.run_once = wsgienv[r'wsgi.run_once']
240 self.run_once = wsgienv[r'wsgi.run_once']
241 self.env = wsgienv
241 self.env = wsgienv
242 self.form = normalize(cgi.parse(self.inp,
242 self.form = normalize(cgi.parse(self.inp,
243 self.env,
243 self.env,
244 keep_blank_values=1))
244 keep_blank_values=1))
245 self._start_response = start_response
245 self._start_response = start_response
246 self.server_write = None
246 self.server_write = None
247 self.headers = []
247 self.headers = []
248
248
249 def __iter__(self):
250 return iter([])
251
252 def drain(self):
249 def drain(self):
253 '''need to read all data from request, httplib is half-duplex'''
250 '''need to read all data from request, httplib is half-duplex'''
254 length = int(self.env.get('CONTENT_LENGTH') or 0)
251 length = int(self.env.get('CONTENT_LENGTH') or 0)
255 for s in util.filechunkiter(self.inp, limit=length):
252 for s in util.filechunkiter(self.inp, limit=length):
256 pass
253 pass
257
254
258 def respond(self, status, type, filename=None, body=None):
255 def respond(self, status, type, filename=None, body=None):
259 if not isinstance(type, str):
256 if not isinstance(type, str):
260 type = pycompat.sysstr(type)
257 type = pycompat.sysstr(type)
261 if self._start_response is not None:
258 if self._start_response is not None:
262 self.headers.append((r'Content-Type', type))
259 self.headers.append((r'Content-Type', type))
263 if filename:
260 if filename:
264 filename = (filename.rpartition('/')[-1]
261 filename = (filename.rpartition('/')[-1]
265 .replace('\\', '\\\\').replace('"', '\\"'))
262 .replace('\\', '\\\\').replace('"', '\\"'))
266 self.headers.append(('Content-Disposition',
263 self.headers.append(('Content-Disposition',
267 'inline; filename="%s"' % filename))
264 'inline; filename="%s"' % filename))
268 if body is not None:
265 if body is not None:
269 self.headers.append((r'Content-Length', str(len(body))))
266 self.headers.append((r'Content-Length', str(len(body))))
270
267
271 for k, v in self.headers:
268 for k, v in self.headers:
272 if not isinstance(v, str):
269 if not isinstance(v, str):
273 raise TypeError('header value must be string: %r' % (v,))
270 raise TypeError('header value must be string: %r' % (v,))
274
271
275 if isinstance(status, ErrorResponse):
272 if isinstance(status, ErrorResponse):
276 self.headers.extend(status.headers)
273 self.headers.extend(status.headers)
277 if status.code == HTTP_NOT_MODIFIED:
274 if status.code == HTTP_NOT_MODIFIED:
278 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
275 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
279 # it MUST NOT include any headers other than these and no
276 # it MUST NOT include any headers other than these and no
280 # body
277 # body
281 self.headers = [(k, v) for (k, v) in self.headers if
278 self.headers = [(k, v) for (k, v) in self.headers if
282 k in ('Date', 'ETag', 'Expires',
279 k in ('Date', 'ETag', 'Expires',
283 'Cache-Control', 'Vary')]
280 'Cache-Control', 'Vary')]
284 status = statusmessage(status.code, pycompat.bytestr(status))
281 status = statusmessage(status.code, pycompat.bytestr(status))
285 elif status == 200:
282 elif status == 200:
286 status = '200 Script output follows'
283 status = '200 Script output follows'
287 elif isinstance(status, int):
284 elif isinstance(status, int):
288 status = statusmessage(status)
285 status = statusmessage(status)
289
286
290 self.server_write = self._start_response(
287 self.server_write = self._start_response(
291 pycompat.sysstr(status), self.headers)
288 pycompat.sysstr(status), self.headers)
292 self._start_response = None
289 self._start_response = None
293 self.headers = []
290 self.headers = []
294 if body is not None:
291 if body is not None:
295 self.write(body)
292 self.write(body)
296 self.server_write = None
293 self.server_write = None
297
294
298 def write(self, thing):
295 def write(self, thing):
299 if thing:
296 if thing:
300 try:
297 try:
301 self.server_write(thing)
298 self.server_write(thing)
302 except socket.error as inst:
299 except socket.error as inst:
303 if inst[0] != errno.ECONNRESET:
300 if inst[0] != errno.ECONNRESET:
304 raise
301 raise
305
302
306 def flush(self):
303 def flush(self):
307 return None
304 return None
308
305
309 def wsgiapplication(app_maker):
306 def wsgiapplication(app_maker):
310 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
307 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
311 can and should now be used as a WSGI application.'''
308 can and should now be used as a WSGI application.'''
312 application = app_maker()
309 application = app_maker()
313 def run_wsgi(env, respond):
310 def run_wsgi(env, respond):
314 return application(env, respond)
311 return application(env, respond)
315 return run_wsgi
312 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now