##// END OF EJS Templates
hgweb: refactor multirequest to be a dict of lists...
Gregory Szorc -
r37012:44467a4d default
parent child Browse files
Show More
@@ -1,585 +1,558
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import wsgiref.headers as wsgiheaders
12 12 #import wsgiref.validate
13 13
14 14 from ..thirdparty import (
15 15 attr,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 20 util,
21 21 )
22 22
23 23 class multidict(object):
24 24 """A dict like object that can store multiple values for a key.
25 25
26 26 Used to store parsed request parameters.
27 27
28 28 This is inspired by WebOb's class of the same name.
29 29 """
30 30 def __init__(self):
31 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
32 # don't rely on parameters that much, so it shouldn't be a perf issue.
33 # we can always add dict for fast lookups.
34 self._items = []
31 self._items = {}
35 32
36 33 def __getitem__(self, key):
37 34 """Returns the last set value for a key."""
38 for k, v in reversed(self._items):
39 if k == key:
40 return v
41
42 raise KeyError(key)
35 return self._items[key][-1]
43 36
44 37 def __setitem__(self, key, value):
45 38 """Replace a values for a key with a new value."""
46 try:
47 del self[key]
48 except KeyError:
49 pass
50
51 self._items.append((key, value))
39 self._items[key] = [value]
52 40
53 41 def __delitem__(self, key):
54 42 """Delete all values for a key."""
55 oldlen = len(self._items)
56
57 self._items[:] = [(k, v) for k, v in self._items if k != key]
58
59 if oldlen == len(self._items):
60 raise KeyError(key)
43 del self._items[key]
61 44
62 45 def __contains__(self, key):
63 return any(k == key for k, v in self._items)
46 return key in self._items
64 47
65 48 def __len__(self):
66 49 return len(self._items)
67 50
68 51 def get(self, key, default=None):
69 52 try:
70 53 return self.__getitem__(key)
71 54 except KeyError:
72 55 return default
73 56
74 57 def add(self, key, value):
75 58 """Add a new value for a key. Does not replace existing values."""
76 self._items.append((key, value))
59 self._items.setdefault(key, []).append(value)
77 60
78 61 def getall(self, key):
79 62 """Obtains all values for a key."""
80 return [v for k, v in self._items if k == key]
63 return self._items.get(key, [])
81 64
82 65 def getone(self, key):
83 66 """Obtain a single value for a key.
84 67
85 68 Raises KeyError if key not defined or it has multiple values set.
86 69 """
87 vals = self.getall(key)
88
89 if not vals:
90 raise KeyError(key)
70 vals = self._items[key]
91 71
92 72 if len(vals) > 1:
93 73 raise KeyError('multiple values for %r' % key)
94 74
95 75 return vals[0]
96 76
97 77 def asdictoflists(self):
98 d = {}
99 for k, v in self._items:
100 if k in d:
101 d[k].append(v)
102 else:
103 d[k] = [v]
104
105 return d
78 return {k: list(v) for k, v in self._items.iteritems()}
106 79
107 80 @attr.s(frozen=True)
108 81 class parsedrequest(object):
109 82 """Represents a parsed WSGI request.
110 83
111 84 Contains both parsed parameters as well as a handle on the input stream.
112 85 """
113 86
114 87 # Request method.
115 88 method = attr.ib()
116 89 # Full URL for this request.
117 90 url = attr.ib()
118 91 # URL without any path components. Just <proto>://<host><port>.
119 92 baseurl = attr.ib()
120 93 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
121 94 # of HTTP: Host header for hostname. This is likely what clients used.
122 95 advertisedurl = attr.ib()
123 96 advertisedbaseurl = attr.ib()
124 97 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
125 98 urlscheme = attr.ib()
126 99 # Value of REMOTE_USER, if set, or None.
127 100 remoteuser = attr.ib()
128 101 # Value of REMOTE_HOST, if set, or None.
129 102 remotehost = attr.ib()
130 103 # Relative WSGI application path. If defined, will begin with a
131 104 # ``/``.
132 105 apppath = attr.ib()
133 106 # List of path parts to be used for dispatch.
134 107 dispatchparts = attr.ib()
135 108 # URL path component (no query string) used for dispatch. Can be
136 109 # ``None`` to signal no path component given to the request, an
137 110 # empty string to signal a request to the application's root URL,
138 111 # or a string not beginning with ``/`` containing the requested
139 112 # path under the application.
140 113 dispatchpath = attr.ib()
141 114 # The name of the repository being accessed.
142 115 reponame = attr.ib()
143 116 # Raw query string (part after "?" in URL).
144 117 querystring = attr.ib()
145 118 # multidict of query string parameters.
146 119 qsparams = attr.ib()
147 120 # wsgiref.headers.Headers instance. Operates like a dict with case
148 121 # insensitive keys.
149 122 headers = attr.ib()
150 123 # Request body input stream.
151 124 bodyfh = attr.ib()
152 125 # WSGI environment dict, unmodified.
153 126 rawenv = attr.ib()
154 127
155 128 def parserequestfromenv(env, reponame=None, altbaseurl=None):
156 129 """Parse URL components from environment variables.
157 130
158 131 WSGI defines request attributes via environment variables. This function
159 132 parses the environment variables into a data structure.
160 133
161 134 If ``reponame`` is defined, the leading path components matching that
162 135 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
163 136 This simulates the world view of a WSGI application that processes
164 137 requests from the base URL of a repo.
165 138
166 139 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
167 140 is defined, it is used - instead of the WSGI environment variables - for
168 141 constructing URL components up to and including the WSGI application path.
169 142 For example, if the current WSGI application is at ``/repo`` and a request
170 143 is made to ``/rev/@`` with this argument set to
171 144 ``http://myserver:9000/prefix``, the URL and path components will resolve as
172 145 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
173 146 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
174 147 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
175 148 """
176 149 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
177 150
178 151 # We first validate that the incoming object conforms with the WSGI spec.
179 152 # We only want to be dealing with spec-conforming WSGI implementations.
180 153 # TODO enable this once we fix internal violations.
181 154 #wsgiref.validate.check_environ(env)
182 155
183 156 # PEP-0333 states that environment keys and values are native strings
184 157 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
185 158 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
186 159 # in Mercurial, so mass convert string keys and values to bytes.
187 160 if pycompat.ispy3:
188 161 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
189 162 env = {k: v.encode('latin-1') if isinstance(v, str) else v
190 163 for k, v in env.iteritems()}
191 164
192 165 if altbaseurl:
193 166 altbaseurl = util.url(altbaseurl)
194 167
195 168 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
196 169 # the environment variables.
197 170 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
198 171 # how URLs are reconstructed.
199 172 fullurl = env['wsgi.url_scheme'] + '://'
200 173
201 174 if altbaseurl and altbaseurl.scheme:
202 175 advertisedfullurl = altbaseurl.scheme + '://'
203 176 else:
204 177 advertisedfullurl = fullurl
205 178
206 179 def addport(s, port):
207 180 if s.startswith('https://'):
208 181 if port != '443':
209 182 s += ':' + port
210 183 else:
211 184 if port != '80':
212 185 s += ':' + port
213 186
214 187 return s
215 188
216 189 if env.get('HTTP_HOST'):
217 190 fullurl += env['HTTP_HOST']
218 191 else:
219 192 fullurl += env['SERVER_NAME']
220 193 fullurl = addport(fullurl, env['SERVER_PORT'])
221 194
222 195 if altbaseurl and altbaseurl.host:
223 196 advertisedfullurl += altbaseurl.host
224 197
225 198 if altbaseurl.port:
226 199 port = altbaseurl.port
227 200 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
228 201 port = '80'
229 202 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
230 203 port = '443'
231 204 else:
232 205 port = env['SERVER_PORT']
233 206
234 207 advertisedfullurl = addport(advertisedfullurl, port)
235 208 else:
236 209 advertisedfullurl += env['SERVER_NAME']
237 210 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
238 211
239 212 baseurl = fullurl
240 213 advertisedbaseurl = advertisedfullurl
241 214
242 215 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
243 216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
244 217
245 218 if altbaseurl:
246 219 path = altbaseurl.path or ''
247 220 if path and not path.startswith('/'):
248 221 path = '/' + path
249 222 advertisedfullurl += util.urlreq.quote(path)
250 223 else:
251 224 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
252 225
253 226 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
254 227
255 228 if env.get('QUERY_STRING'):
256 229 fullurl += '?' + env['QUERY_STRING']
257 230 advertisedfullurl += '?' + env['QUERY_STRING']
258 231
259 232 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
260 233 # that represents the repository being dispatched to. When computing
261 234 # the dispatch info, we ignore these leading path components.
262 235
263 236 if altbaseurl:
264 237 apppath = altbaseurl.path or ''
265 238 if apppath and not apppath.startswith('/'):
266 239 apppath = '/' + apppath
267 240 else:
268 241 apppath = env.get('SCRIPT_NAME', '')
269 242
270 243 if reponame:
271 244 repoprefix = '/' + reponame.strip('/')
272 245
273 246 if not env.get('PATH_INFO'):
274 247 raise error.ProgrammingError('reponame requires PATH_INFO')
275 248
276 249 if not env['PATH_INFO'].startswith(repoprefix):
277 250 raise error.ProgrammingError('PATH_INFO does not begin with repo '
278 251 'name: %s (%s)' % (env['PATH_INFO'],
279 252 reponame))
280 253
281 254 dispatchpath = env['PATH_INFO'][len(repoprefix):]
282 255
283 256 if dispatchpath and not dispatchpath.startswith('/'):
284 257 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
285 258 'not end at path delimiter: %s (%s)' %
286 259 (env['PATH_INFO'], reponame))
287 260
288 261 apppath = apppath.rstrip('/') + repoprefix
289 262 dispatchparts = dispatchpath.strip('/').split('/')
290 263 dispatchpath = '/'.join(dispatchparts)
291 264
292 265 elif 'PATH_INFO' in env:
293 266 if env['PATH_INFO'].strip('/'):
294 267 dispatchparts = env['PATH_INFO'].strip('/').split('/')
295 268 dispatchpath = '/'.join(dispatchparts)
296 269 else:
297 270 dispatchparts = []
298 271 dispatchpath = ''
299 272 else:
300 273 dispatchparts = []
301 274 dispatchpath = None
302 275
303 276 querystring = env.get('QUERY_STRING', '')
304 277
305 278 # We store as a list so we have ordering information. We also store as
306 279 # a dict to facilitate fast lookup.
307 280 qsparams = multidict()
308 281 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
309 282 qsparams.add(k, v)
310 283
311 284 # HTTP_* keys contain HTTP request headers. The Headers structure should
312 285 # perform case normalization for us. We just rewrite underscore to dash
313 286 # so keys match what likely went over the wire.
314 287 headers = []
315 288 for k, v in env.iteritems():
316 289 if k.startswith('HTTP_'):
317 290 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
318 291
319 292 headers = wsgiheaders.Headers(headers)
320 293
321 294 # This is kind of a lie because the HTTP header wasn't explicitly
322 295 # sent. But for all intents and purposes it should be OK to lie about
323 296 # this, since a consumer will either either value to determine how many
324 297 # bytes are available to read.
325 298 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
326 299 headers['Content-Length'] = env['CONTENT_LENGTH']
327 300
328 301 bodyfh = env['wsgi.input']
329 302 if 'Content-Length' in headers:
330 303 bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
331 304
332 305 return parsedrequest(method=env['REQUEST_METHOD'],
333 306 url=fullurl, baseurl=baseurl,
334 307 advertisedurl=advertisedfullurl,
335 308 advertisedbaseurl=advertisedbaseurl,
336 309 urlscheme=env['wsgi.url_scheme'],
337 310 remoteuser=env.get('REMOTE_USER'),
338 311 remotehost=env.get('REMOTE_HOST'),
339 312 apppath=apppath,
340 313 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
341 314 reponame=reponame,
342 315 querystring=querystring,
343 316 qsparams=qsparams,
344 317 headers=headers,
345 318 bodyfh=bodyfh,
346 319 rawenv=env)
347 320
348 321 class offsettrackingwriter(object):
349 322 """A file object like object that is append only and tracks write count.
350 323
351 324 Instances are bound to a callable. This callable is called with data
352 325 whenever a ``write()`` is attempted.
353 326
354 327 Instances track the amount of written data so they can answer ``tell()``
355 328 requests.
356 329
357 330 The intent of this class is to wrap the ``write()`` function returned by
358 331 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
359 332 not a file object, it doesn't implement other file object methods.
360 333 """
361 334 def __init__(self, writefn):
362 335 self._write = writefn
363 336 self._offset = 0
364 337
365 338 def write(self, s):
366 339 res = self._write(s)
367 340 # Some Python objects don't report the number of bytes written.
368 341 if res is None:
369 342 self._offset += len(s)
370 343 else:
371 344 self._offset += res
372 345
373 346 def flush(self):
374 347 pass
375 348
376 349 def tell(self):
377 350 return self._offset
378 351
379 352 class wsgiresponse(object):
380 353 """Represents a response to a WSGI request.
381 354
382 355 A response consists of a status line, headers, and a body.
383 356
384 357 Consumers must populate the ``status`` and ``headers`` fields and
385 358 make a call to a ``setbody*()`` method before the response can be
386 359 issued.
387 360
388 361 When it is time to start sending the response over the wire,
389 362 ``sendresponse()`` is called. It handles emitting the header portion
390 363 of the response message. It then yields chunks of body data to be
391 364 written to the peer. Typically, the WSGI application itself calls
392 365 and returns the value from ``sendresponse()``.
393 366 """
394 367
395 368 def __init__(self, req, startresponse):
396 369 """Create an empty response tied to a specific request.
397 370
398 371 ``req`` is a ``parsedrequest``. ``startresponse`` is the
399 372 ``start_response`` function passed to the WSGI application.
400 373 """
401 374 self._req = req
402 375 self._startresponse = startresponse
403 376
404 377 self.status = None
405 378 self.headers = wsgiheaders.Headers([])
406 379
407 380 self._bodybytes = None
408 381 self._bodygen = None
409 382 self._bodywillwrite = False
410 383 self._started = False
411 384 self._bodywritefn = None
412 385
413 386 def _verifybody(self):
414 387 if (self._bodybytes is not None or self._bodygen is not None
415 388 or self._bodywillwrite):
416 389 raise error.ProgrammingError('cannot define body multiple times')
417 390
418 391 def setbodybytes(self, b):
419 392 """Define the response body as static bytes.
420 393
421 394 The empty string signals that there is no response body.
422 395 """
423 396 self._verifybody()
424 397 self._bodybytes = b
425 398 self.headers['Content-Length'] = '%d' % len(b)
426 399
427 400 def setbodygen(self, gen):
428 401 """Define the response body as a generator of bytes."""
429 402 self._verifybody()
430 403 self._bodygen = gen
431 404
432 405 def setbodywillwrite(self):
433 406 """Signal an intent to use write() to emit the response body.
434 407
435 408 **This is the least preferred way to send a body.**
436 409
437 410 It is preferred for WSGI applications to emit a generator of chunks
438 411 constituting the response body. However, some consumers can't emit
439 412 data this way. So, WSGI provides a way to obtain a ``write(data)``
440 413 function that can be used to synchronously perform an unbuffered
441 414 write.
442 415
443 416 Calling this function signals an intent to produce the body in this
444 417 manner.
445 418 """
446 419 self._verifybody()
447 420 self._bodywillwrite = True
448 421
449 422 def sendresponse(self):
450 423 """Send the generated response to the client.
451 424
452 425 Before this is called, ``status`` must be set and one of
453 426 ``setbodybytes()`` or ``setbodygen()`` must be called.
454 427
455 428 Calling this method multiple times is not allowed.
456 429 """
457 430 if self._started:
458 431 raise error.ProgrammingError('sendresponse() called multiple times')
459 432
460 433 self._started = True
461 434
462 435 if not self.status:
463 436 raise error.ProgrammingError('status line not defined')
464 437
465 438 if (self._bodybytes is None and self._bodygen is None
466 439 and not self._bodywillwrite):
467 440 raise error.ProgrammingError('response body not defined')
468 441
469 442 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
470 443 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
471 444 # and SHOULD NOT generate other headers unless they could be used
472 445 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
473 446 # states that no response body can be issued. Content-Length can
474 447 # be sent. But if it is present, it should be the size of the response
475 448 # that wasn't transferred.
476 449 if self.status.startswith('304 '):
477 450 # setbodybytes('') will set C-L to 0. This doesn't conform with the
478 451 # spec. So remove it.
479 452 if self.headers.get('Content-Length') == '0':
480 453 del self.headers['Content-Length']
481 454
482 455 # Strictly speaking, this is too strict. But until it causes
483 456 # problems, let's be strict.
484 457 badheaders = {k for k in self.headers.keys()
485 458 if k.lower() not in ('date', 'etag', 'expires',
486 459 'cache-control',
487 460 'content-location',
488 461 'vary')}
489 462 if badheaders:
490 463 raise error.ProgrammingError(
491 464 'illegal header on 304 response: %s' %
492 465 ', '.join(sorted(badheaders)))
493 466
494 467 if self._bodygen is not None or self._bodywillwrite:
495 468 raise error.ProgrammingError("must use setbodybytes('') with "
496 469 "304 responses")
497 470
498 471 # Various HTTP clients (notably httplib) won't read the HTTP response
499 472 # until the HTTP request has been sent in full. If servers (us) send a
500 473 # response before the HTTP request has been fully sent, the connection
501 474 # may deadlock because neither end is reading.
502 475 #
503 476 # We work around this by "draining" the request data before
504 477 # sending any response in some conditions.
505 478 drain = False
506 479 close = False
507 480
508 481 # If the client sent Expect: 100-continue, we assume it is smart enough
509 482 # to deal with the server sending a response before reading the request.
510 483 # (httplib doesn't do this.)
511 484 if self._req.headers.get('Expect', '').lower() == '100-continue':
512 485 pass
513 486 # Only tend to request methods that have bodies. Strictly speaking,
514 487 # we should sniff for a body. But this is fine for our existing
515 488 # WSGI applications.
516 489 elif self._req.method not in ('POST', 'PUT'):
517 490 pass
518 491 else:
519 492 # If we don't know how much data to read, there's no guarantee
520 493 # that we can drain the request responsibly. The WSGI
521 494 # specification only says that servers *should* ensure the
522 495 # input stream doesn't overrun the actual request. So there's
523 496 # no guarantee that reading until EOF won't corrupt the stream
524 497 # state.
525 498 if not isinstance(self._req.bodyfh, util.cappedreader):
526 499 close = True
527 500 else:
528 501 # We /could/ only drain certain HTTP response codes. But 200 and
529 502 # non-200 wire protocol responses both require draining. Since
530 503 # we have a capped reader in place for all situations where we
531 504 # drain, it is safe to read from that stream. We'll either do
532 505 # a drain or no-op if we're already at EOF.
533 506 drain = True
534 507
535 508 if close:
536 509 self.headers['Connection'] = 'Close'
537 510
538 511 if drain:
539 512 assert isinstance(self._req.bodyfh, util.cappedreader)
540 513 while True:
541 514 chunk = self._req.bodyfh.read(32768)
542 515 if not chunk:
543 516 break
544 517
545 518 write = self._startresponse(pycompat.sysstr(self.status),
546 519 self.headers.items())
547 520
548 521 if self._bodybytes:
549 522 yield self._bodybytes
550 523 elif self._bodygen:
551 524 for chunk in self._bodygen:
552 525 yield chunk
553 526 elif self._bodywillwrite:
554 527 self._bodywritefn = write
555 528 else:
556 529 error.ProgrammingError('do not know how to send body')
557 530
558 531 def getbodyfile(self):
559 532 """Obtain a file object like object representing the response body.
560 533
561 534 For this to work, you must call ``setbodywillwrite()`` and then
562 535 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
563 536 function won't run to completion unless the generator is advanced. The
564 537 generator yields not items. The easiest way to consume it is with
565 538 ``list(res.sendresponse())``, which should resolve to an empty list -
566 539 ``[]``.
567 540 """
568 541 if not self._bodywillwrite:
569 542 raise error.ProgrammingError('must call setbodywillwrite() first')
570 543
571 544 if not self._started:
572 545 raise error.ProgrammingError('must call sendresponse() first; did '
573 546 'you remember to consume it since it '
574 547 'is a generator?')
575 548
576 549 assert self._bodywritefn
577 550 return offsettrackingwriter(self._bodywritefn)
578 551
579 552 def wsgiapplication(app_maker):
580 553 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
581 554 can and should now be used as a WSGI application.'''
582 555 application = app_maker()
583 556 def run_wsgi(env, respond):
584 557 return application(env, respond)
585 558 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now