##// END OF EJS Templates
hgweb: use a capped reader for WSGI input stream...
Gregory Szorc -
r36870:290fc4c3 default
parent child Browse files
Show More
@@ -1,312 +1,320
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import cgi
12 12 import errno
13 13 import socket
14 14 import wsgiref.headers as wsgiheaders
15 15 #import wsgiref.validate
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_NOT_MODIFIED,
20 20 statusmessage,
21 21 )
22 22
23 23 from ..thirdparty import (
24 24 attr,
25 25 )
26 26 from .. import (
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 shortcuts = {
32 32 'cl': [('cmd', ['changelog']), ('rev', None)],
33 33 'sl': [('cmd', ['shortlog']), ('rev', None)],
34 34 'cs': [('cmd', ['changeset']), ('node', None)],
35 35 'f': [('cmd', ['file']), ('filenode', None)],
36 36 'fl': [('cmd', ['filelog']), ('filenode', None)],
37 37 'fd': [('cmd', ['filediff']), ('node', None)],
38 38 'fa': [('cmd', ['annotate']), ('filenode', None)],
39 39 'mf': [('cmd', ['manifest']), ('manifest', None)],
40 40 'ca': [('cmd', ['archive']), ('node', None)],
41 41 'tags': [('cmd', ['tags'])],
42 42 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
43 43 'static': [('cmd', ['static']), ('file', None)]
44 44 }
45 45
46 46 def normalize(form):
47 47 # first expand the shortcuts
48 48 for k in shortcuts:
49 49 if k in form:
50 50 for name, value in shortcuts[k]:
51 51 if value is None:
52 52 value = form[k]
53 53 form[name] = value
54 54 del form[k]
55 55 # And strip the values
56 56 bytesform = {}
57 57 for k, v in form.iteritems():
58 58 bytesform[pycompat.bytesurl(k)] = [
59 59 pycompat.bytesurl(i.strip()) for i in v]
60 60 return bytesform
61 61
62 62 @attr.s(frozen=True)
63 63 class parsedrequest(object):
64 64 """Represents a parsed WSGI request / static HTTP request parameters."""
65 65
66 66 # Request method.
67 67 method = attr.ib()
68 68 # Full URL for this request.
69 69 url = attr.ib()
70 70 # URL without any path components. Just <proto>://<host><port>.
71 71 baseurl = attr.ib()
72 72 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
73 73 # of HTTP: Host header for hostname. This is likely what clients used.
74 74 advertisedurl = attr.ib()
75 75 advertisedbaseurl = attr.ib()
76 76 # WSGI application path.
77 77 apppath = attr.ib()
78 78 # List of path parts to be used for dispatch.
79 79 dispatchparts = attr.ib()
80 80 # URL path component (no query string) used for dispatch.
81 81 dispatchpath = attr.ib()
82 82 # Whether there is a path component to this request. This can be true
83 83 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
84 84 havepathinfo = attr.ib()
85 85 # Raw query string (part after "?" in URL).
86 86 querystring = attr.ib()
87 87 # List of 2-tuples of query string arguments.
88 88 querystringlist = attr.ib()
89 89 # Dict of query string arguments. Values are lists with at least 1 item.
90 90 querystringdict = attr.ib()
91 91 # wsgiref.headers.Headers instance. Operates like a dict with case
92 92 # insensitive keys.
93 93 headers = attr.ib()
94 94
95 95 def parserequestfromenv(env):
96 96 """Parse URL components from environment variables.
97 97
98 98 WSGI defines request attributes via environment variables. This function
99 99 parses the environment variables into a data structure.
100 100 """
101 101 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
102 102
103 103 # We first validate that the incoming object conforms with the WSGI spec.
104 104 # We only want to be dealing with spec-conforming WSGI implementations.
105 105 # TODO enable this once we fix internal violations.
106 106 #wsgiref.validate.check_environ(env)
107 107
108 108 # PEP-0333 states that environment keys and values are native strings
109 109 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
110 110 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
111 111 # in Mercurial, so mass convert string keys and values to bytes.
112 112 if pycompat.ispy3:
113 113 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
114 114 env = {k: v.encode('latin-1') if isinstance(v, str) else v
115 115 for k, v in env.iteritems()}
116 116
117 117 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
118 118 # the environment variables.
119 119 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
120 120 # how URLs are reconstructed.
121 121 fullurl = env['wsgi.url_scheme'] + '://'
122 122 advertisedfullurl = fullurl
123 123
124 124 def addport(s):
125 125 if env['wsgi.url_scheme'] == 'https':
126 126 if env['SERVER_PORT'] != '443':
127 127 s += ':' + env['SERVER_PORT']
128 128 else:
129 129 if env['SERVER_PORT'] != '80':
130 130 s += ':' + env['SERVER_PORT']
131 131
132 132 return s
133 133
134 134 if env.get('HTTP_HOST'):
135 135 fullurl += env['HTTP_HOST']
136 136 else:
137 137 fullurl += env['SERVER_NAME']
138 138 fullurl = addport(fullurl)
139 139
140 140 advertisedfullurl += env['SERVER_NAME']
141 141 advertisedfullurl = addport(advertisedfullurl)
142 142
143 143 baseurl = fullurl
144 144 advertisedbaseurl = advertisedfullurl
145 145
146 146 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
147 147 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
148 148 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
149 149 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
150 150
151 151 if env.get('QUERY_STRING'):
152 152 fullurl += '?' + env['QUERY_STRING']
153 153 advertisedfullurl += '?' + env['QUERY_STRING']
154 154
155 155 # When dispatching requests, we look at the URL components (PATH_INFO
156 156 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
157 157 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
158 158 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
159 159 # root. We also exclude its path components from PATH_INFO when resolving
160 160 # the dispatch path.
161 161
162 162 apppath = env['SCRIPT_NAME']
163 163
164 164 if env.get('REPO_NAME'):
165 165 if not apppath.endswith('/'):
166 166 apppath += '/'
167 167
168 168 apppath += env.get('REPO_NAME')
169 169
170 170 if 'PATH_INFO' in env:
171 171 dispatchparts = env['PATH_INFO'].strip('/').split('/')
172 172
173 173 # Strip out repo parts.
174 174 repoparts = env.get('REPO_NAME', '').split('/')
175 175 if dispatchparts[:len(repoparts)] == repoparts:
176 176 dispatchparts = dispatchparts[len(repoparts):]
177 177 else:
178 178 dispatchparts = []
179 179
180 180 dispatchpath = '/'.join(dispatchparts)
181 181
182 182 querystring = env.get('QUERY_STRING', '')
183 183
184 184 # We store as a list so we have ordering information. We also store as
185 185 # a dict to facilitate fast lookup.
186 186 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
187 187
188 188 querystringdict = {}
189 189 for k, v in querystringlist:
190 190 if k in querystringdict:
191 191 querystringdict[k].append(v)
192 192 else:
193 193 querystringdict[k] = [v]
194 194
195 195 # HTTP_* keys contain HTTP request headers. The Headers structure should
196 196 # perform case normalization for us. We just rewrite underscore to dash
197 197 # so keys match what likely went over the wire.
198 198 headers = []
199 199 for k, v in env.iteritems():
200 200 if k.startswith('HTTP_'):
201 201 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
202 202
203 203 headers = wsgiheaders.Headers(headers)
204 204
205 205 # This is kind of a lie because the HTTP header wasn't explicitly
206 206 # sent. But for all intents and purposes it should be OK to lie about
207 207 # this, since a consumer will either either value to determine how many
208 208 # bytes are available to read.
209 209 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
210 210 headers['Content-Length'] = env['CONTENT_LENGTH']
211 211
212 212 return parsedrequest(method=env['REQUEST_METHOD'],
213 213 url=fullurl, baseurl=baseurl,
214 214 advertisedurl=advertisedfullurl,
215 215 advertisedbaseurl=advertisedbaseurl,
216 216 apppath=apppath,
217 217 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
218 218 havepathinfo='PATH_INFO' in env,
219 219 querystring=querystring,
220 220 querystringlist=querystringlist,
221 221 querystringdict=querystringdict,
222 222 headers=headers)
223 223
224 224 class wsgirequest(object):
225 225 """Higher-level API for a WSGI request.
226 226
227 227 WSGI applications are invoked with 2 arguments. They are used to
228 228 instantiate instances of this class, which provides higher-level APIs
229 229 for obtaining request parameters, writing HTTP output, etc.
230 230 """
231 231 def __init__(self, wsgienv, start_response):
232 232 version = wsgienv[r'wsgi.version']
233 233 if (version < (1, 0)) or (version >= (2, 0)):
234 234 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
235 235 % version)
236 236 self.inp = wsgienv[r'wsgi.input']
237
238 if r'HTTP_CONTENT_LENGTH' in wsgienv:
239 self.inp = util.cappedreader(self.inp,
240 int(wsgienv[r'HTTP_CONTENT_LENGTH']))
241 elif r'CONTENT_LENGTH' in wsgienv:
242 self.inp = util.cappedreader(self.inp,
243 int(wsgienv[r'CONTENT_LENGTH']))
244
237 245 self.err = wsgienv[r'wsgi.errors']
238 246 self.threaded = wsgienv[r'wsgi.multithread']
239 247 self.multiprocess = wsgienv[r'wsgi.multiprocess']
240 248 self.run_once = wsgienv[r'wsgi.run_once']
241 249 self.env = wsgienv
242 250 self.form = normalize(cgi.parse(self.inp,
243 251 self.env,
244 252 keep_blank_values=1))
245 253 self._start_response = start_response
246 254 self.server_write = None
247 255 self.headers = []
248 256
249 257 def drain(self):
250 258 '''need to read all data from request, httplib is half-duplex'''
251 259 length = int(self.env.get('CONTENT_LENGTH') or 0)
252 260 for s in util.filechunkiter(self.inp, limit=length):
253 261 pass
254 262
255 263 def respond(self, status, type, filename=None, body=None):
256 264 if not isinstance(type, str):
257 265 type = pycompat.sysstr(type)
258 266 if self._start_response is not None:
259 267 self.headers.append((r'Content-Type', type))
260 268 if filename:
261 269 filename = (filename.rpartition('/')[-1]
262 270 .replace('\\', '\\\\').replace('"', '\\"'))
263 271 self.headers.append(('Content-Disposition',
264 272 'inline; filename="%s"' % filename))
265 273 if body is not None:
266 274 self.headers.append((r'Content-Length', str(len(body))))
267 275
268 276 for k, v in self.headers:
269 277 if not isinstance(v, str):
270 278 raise TypeError('header value must be string: %r' % (v,))
271 279
272 280 if isinstance(status, ErrorResponse):
273 281 self.headers.extend(status.headers)
274 282 if status.code == HTTP_NOT_MODIFIED:
275 283 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
276 284 # it MUST NOT include any headers other than these and no
277 285 # body
278 286 self.headers = [(k, v) for (k, v) in self.headers if
279 287 k in ('Date', 'ETag', 'Expires',
280 288 'Cache-Control', 'Vary')]
281 289 status = statusmessage(status.code, pycompat.bytestr(status))
282 290 elif status == 200:
283 291 status = '200 Script output follows'
284 292 elif isinstance(status, int):
285 293 status = statusmessage(status)
286 294
287 295 self.server_write = self._start_response(
288 296 pycompat.sysstr(status), self.headers)
289 297 self._start_response = None
290 298 self.headers = []
291 299 if body is not None:
292 300 self.write(body)
293 301 self.server_write = None
294 302
295 303 def write(self, thing):
296 304 if thing:
297 305 try:
298 306 self.server_write(thing)
299 307 except socket.error as inst:
300 308 if inst[0] != errno.ECONNRESET:
301 309 raise
302 310
303 311 def flush(self):
304 312 return None
305 313
306 314 def wsgiapplication(app_maker):
307 315 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
308 316 can and should now be used as a WSGI application.'''
309 317 application = app_maker()
310 318 def run_wsgi(env, respond):
311 319 return application(env, respond)
312 320 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now