##// END OF EJS Templates
hgweb: encode WSGI environment like OS environment...
Manuel Jacob -
r45541:2632c1ed stable
parent child Browse files
Show More
@@ -1,623 +1,632 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 # import wsgiref.validate
12 12
13 13 from ..thirdparty import attr
14 14 from .. import (
15 encoding,
15 16 error,
16 17 pycompat,
17 18 util,
18 19 )
19 20
20 21
21 22 class multidict(object):
22 23 """A dict like object that can store multiple values for a key.
23 24
24 25 Used to store parsed request parameters.
25 26
26 27 This is inspired by WebOb's class of the same name.
27 28 """
28 29
29 30 def __init__(self):
30 31 self._items = {}
31 32
32 33 def __getitem__(self, key):
33 34 """Returns the last set value for a key."""
34 35 return self._items[key][-1]
35 36
36 37 def __setitem__(self, key, value):
37 38 """Replace a values for a key with a new value."""
38 39 self._items[key] = [value]
39 40
40 41 def __delitem__(self, key):
41 42 """Delete all values for a key."""
42 43 del self._items[key]
43 44
44 45 def __contains__(self, key):
45 46 return key in self._items
46 47
47 48 def __len__(self):
48 49 return len(self._items)
49 50
50 51 def get(self, key, default=None):
51 52 try:
52 53 return self.__getitem__(key)
53 54 except KeyError:
54 55 return default
55 56
56 57 def add(self, key, value):
57 58 """Add a new value for a key. Does not replace existing values."""
58 59 self._items.setdefault(key, []).append(value)
59 60
60 61 def getall(self, key):
61 62 """Obtains all values for a key."""
62 63 return self._items.get(key, [])
63 64
64 65 def getone(self, key):
65 66 """Obtain a single value for a key.
66 67
67 68 Raises KeyError if key not defined or it has multiple values set.
68 69 """
69 70 vals = self._items[key]
70 71
71 72 if len(vals) > 1:
72 73 raise KeyError(b'multiple values for %r' % key)
73 74
74 75 return vals[0]
75 76
76 77 def asdictoflists(self):
77 78 return {k: list(v) for k, v in pycompat.iteritems(self._items)}
78 79
79 80
80 81 @attr.s(frozen=True)
81 82 class parsedrequest(object):
82 83 """Represents a parsed WSGI request.
83 84
84 85 Contains both parsed parameters as well as a handle on the input stream.
85 86 """
86 87
87 88 # Request method.
88 89 method = attr.ib()
89 90 # Full URL for this request.
90 91 url = attr.ib()
91 92 # URL without any path components. Just <proto>://<host><port>.
92 93 baseurl = attr.ib()
93 94 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
94 95 # of HTTP: Host header for hostname. This is likely what clients used.
95 96 advertisedurl = attr.ib()
96 97 advertisedbaseurl = attr.ib()
97 98 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
98 99 urlscheme = attr.ib()
99 100 # Value of REMOTE_USER, if set, or None.
100 101 remoteuser = attr.ib()
101 102 # Value of REMOTE_HOST, if set, or None.
102 103 remotehost = attr.ib()
103 104 # Relative WSGI application path. If defined, will begin with a
104 105 # ``/``.
105 106 apppath = attr.ib()
106 107 # List of path parts to be used for dispatch.
107 108 dispatchparts = attr.ib()
108 109 # URL path component (no query string) used for dispatch. Can be
109 110 # ``None`` to signal no path component given to the request, an
110 111 # empty string to signal a request to the application's root URL,
111 112 # or a string not beginning with ``/`` containing the requested
112 113 # path under the application.
113 114 dispatchpath = attr.ib()
114 115 # The name of the repository being accessed.
115 116 reponame = attr.ib()
116 117 # Raw query string (part after "?" in URL).
117 118 querystring = attr.ib()
118 119 # multidict of query string parameters.
119 120 qsparams = attr.ib()
120 121 # wsgiref.headers.Headers instance. Operates like a dict with case
121 122 # insensitive keys.
122 123 headers = attr.ib()
123 124 # Request body input stream.
124 125 bodyfh = attr.ib()
125 126 # WSGI environment dict, unmodified.
126 127 rawenv = attr.ib()
127 128
128 129
129 130 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
130 131 """Parse URL components from environment variables.
131 132
132 133 WSGI defines request attributes via environment variables. This function
133 134 parses the environment variables into a data structure.
134 135
135 136 If ``reponame`` is defined, the leading path components matching that
136 137 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
137 138 This simulates the world view of a WSGI application that processes
138 139 requests from the base URL of a repo.
139 140
140 141 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
141 142 is defined, it is used - instead of the WSGI environment variables - for
142 143 constructing URL components up to and including the WSGI application path.
143 144 For example, if the current WSGI application is at ``/repo`` and a request
144 145 is made to ``/rev/@`` with this argument set to
145 146 ``http://myserver:9000/prefix``, the URL and path components will resolve as
146 147 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
147 148 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
148 149 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
149 150
150 151 ``bodyfh`` can be used to specify a file object to read the request body
151 152 from. If not defined, ``wsgi.input`` from the environment dict is used.
152 153 """
153 154 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
154 155
155 156 # We first validate that the incoming object conforms with the WSGI spec.
156 157 # We only want to be dealing with spec-conforming WSGI implementations.
157 158 # TODO enable this once we fix internal violations.
158 159 # wsgiref.validate.check_environ(env)
159 160
160 161 # PEP-0333 states that environment keys and values are native strings
161 162 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
162 163 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
163 164 # in Mercurial, so mass convert string keys and values to bytes.
164 165 if pycompat.ispy3:
166
165 167 def tobytes(s):
166 168 if not isinstance(s, str):
167 169 return s
168 return s.encode('latin-1')
170 if pycompat.iswindows:
171 # This is what mercurial.encoding does for os.environ on
172 # Windows.
173 return encoding.strtolocal(s)
174 else:
175 # This is what is documented to be used for os.environ on Unix.
176 return pycompat.fsencode(s)
177
169 178 env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
170 179
171 180 # Some hosting solutions are emulating hgwebdir, and dispatching directly
172 181 # to an hgweb instance using this environment variable. This was always
173 182 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
174 183 if not reponame:
175 184 reponame = env.get(b'REPO_NAME')
176 185
177 186 if altbaseurl:
178 187 altbaseurl = util.url(altbaseurl)
179 188
180 189 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
181 190 # the environment variables.
182 191 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
183 192 # how URLs are reconstructed.
184 193 fullurl = env[b'wsgi.url_scheme'] + b'://'
185 194
186 195 if altbaseurl and altbaseurl.scheme:
187 196 advertisedfullurl = altbaseurl.scheme + b'://'
188 197 else:
189 198 advertisedfullurl = fullurl
190 199
191 200 def addport(s, port):
192 201 if s.startswith(b'https://'):
193 202 if port != b'443':
194 203 s += b':' + port
195 204 else:
196 205 if port != b'80':
197 206 s += b':' + port
198 207
199 208 return s
200 209
201 210 if env.get(b'HTTP_HOST'):
202 211 fullurl += env[b'HTTP_HOST']
203 212 else:
204 213 fullurl += env[b'SERVER_NAME']
205 214 fullurl = addport(fullurl, env[b'SERVER_PORT'])
206 215
207 216 if altbaseurl and altbaseurl.host:
208 217 advertisedfullurl += altbaseurl.host
209 218
210 219 if altbaseurl.port:
211 220 port = altbaseurl.port
212 221 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
213 222 port = b'80'
214 223 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
215 224 port = b'443'
216 225 else:
217 226 port = env[b'SERVER_PORT']
218 227
219 228 advertisedfullurl = addport(advertisedfullurl, port)
220 229 else:
221 230 advertisedfullurl += env[b'SERVER_NAME']
222 231 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
223 232
224 233 baseurl = fullurl
225 234 advertisedbaseurl = advertisedfullurl
226 235
227 236 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
228 237 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
229 238
230 239 if altbaseurl:
231 240 path = altbaseurl.path or b''
232 241 if path and not path.startswith(b'/'):
233 242 path = b'/' + path
234 243 advertisedfullurl += util.urlreq.quote(path)
235 244 else:
236 245 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
237 246
238 247 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
239 248
240 249 if env.get(b'QUERY_STRING'):
241 250 fullurl += b'?' + env[b'QUERY_STRING']
242 251 advertisedfullurl += b'?' + env[b'QUERY_STRING']
243 252
244 253 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
245 254 # that represents the repository being dispatched to. When computing
246 255 # the dispatch info, we ignore these leading path components.
247 256
248 257 if altbaseurl:
249 258 apppath = altbaseurl.path or b''
250 259 if apppath and not apppath.startswith(b'/'):
251 260 apppath = b'/' + apppath
252 261 else:
253 262 apppath = env.get(b'SCRIPT_NAME', b'')
254 263
255 264 if reponame:
256 265 repoprefix = b'/' + reponame.strip(b'/')
257 266
258 267 if not env.get(b'PATH_INFO'):
259 268 raise error.ProgrammingError(b'reponame requires PATH_INFO')
260 269
261 270 if not env[b'PATH_INFO'].startswith(repoprefix):
262 271 raise error.ProgrammingError(
263 272 b'PATH_INFO does not begin with repo '
264 273 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
265 274 )
266 275
267 276 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
268 277
269 278 if dispatchpath and not dispatchpath.startswith(b'/'):
270 279 raise error.ProgrammingError(
271 280 b'reponame prefix of PATH_INFO does '
272 281 b'not end at path delimiter: %s (%s)'
273 282 % (env[b'PATH_INFO'], reponame)
274 283 )
275 284
276 285 apppath = apppath.rstrip(b'/') + repoprefix
277 286 dispatchparts = dispatchpath.strip(b'/').split(b'/')
278 287 dispatchpath = b'/'.join(dispatchparts)
279 288
280 289 elif b'PATH_INFO' in env:
281 290 if env[b'PATH_INFO'].strip(b'/'):
282 291 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
283 292 dispatchpath = b'/'.join(dispatchparts)
284 293 else:
285 294 dispatchparts = []
286 295 dispatchpath = b''
287 296 else:
288 297 dispatchparts = []
289 298 dispatchpath = None
290 299
291 300 querystring = env.get(b'QUERY_STRING', b'')
292 301
293 302 # We store as a list so we have ordering information. We also store as
294 303 # a dict to facilitate fast lookup.
295 304 qsparams = multidict()
296 305 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
297 306 qsparams.add(k, v)
298 307
299 308 # HTTP_* keys contain HTTP request headers. The Headers structure should
300 309 # perform case normalization for us. We just rewrite underscore to dash
301 310 # so keys match what likely went over the wire.
302 311 headers = []
303 312 for k, v in pycompat.iteritems(env):
304 313 if k.startswith(b'HTTP_'):
305 314 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
306 315
307 316 from . import wsgiheaders # avoid cycle
308 317
309 318 headers = wsgiheaders.Headers(headers)
310 319
311 320 # This is kind of a lie because the HTTP header wasn't explicitly
312 321 # sent. But for all intents and purposes it should be OK to lie about
313 322 # this, since a consumer will either either value to determine how many
314 323 # bytes are available to read.
315 324 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
316 325 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
317 326
318 327 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
319 328 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
320 329
321 330 if bodyfh is None:
322 331 bodyfh = env[b'wsgi.input']
323 332 if b'Content-Length' in headers:
324 333 bodyfh = util.cappedreader(
325 334 bodyfh, int(headers[b'Content-Length'] or b'0')
326 335 )
327 336
328 337 return parsedrequest(
329 338 method=env[b'REQUEST_METHOD'],
330 339 url=fullurl,
331 340 baseurl=baseurl,
332 341 advertisedurl=advertisedfullurl,
333 342 advertisedbaseurl=advertisedbaseurl,
334 343 urlscheme=env[b'wsgi.url_scheme'],
335 344 remoteuser=env.get(b'REMOTE_USER'),
336 345 remotehost=env.get(b'REMOTE_HOST'),
337 346 apppath=apppath,
338 347 dispatchparts=dispatchparts,
339 348 dispatchpath=dispatchpath,
340 349 reponame=reponame,
341 350 querystring=querystring,
342 351 qsparams=qsparams,
343 352 headers=headers,
344 353 bodyfh=bodyfh,
345 354 rawenv=env,
346 355 )
347 356
348 357
349 358 class offsettrackingwriter(object):
350 359 """A file object like object that is append only and tracks write count.
351 360
352 361 Instances are bound to a callable. This callable is called with data
353 362 whenever a ``write()`` is attempted.
354 363
355 364 Instances track the amount of written data so they can answer ``tell()``
356 365 requests.
357 366
358 367 The intent of this class is to wrap the ``write()`` function returned by
359 368 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
360 369 not a file object, it doesn't implement other file object methods.
361 370 """
362 371
363 372 def __init__(self, writefn):
364 373 self._write = writefn
365 374 self._offset = 0
366 375
367 376 def write(self, s):
368 377 res = self._write(s)
369 378 # Some Python objects don't report the number of bytes written.
370 379 if res is None:
371 380 self._offset += len(s)
372 381 else:
373 382 self._offset += res
374 383
375 384 def flush(self):
376 385 pass
377 386
378 387 def tell(self):
379 388 return self._offset
380 389
381 390
382 391 class wsgiresponse(object):
383 392 """Represents a response to a WSGI request.
384 393
385 394 A response consists of a status line, headers, and a body.
386 395
387 396 Consumers must populate the ``status`` and ``headers`` fields and
388 397 make a call to a ``setbody*()`` method before the response can be
389 398 issued.
390 399
391 400 When it is time to start sending the response over the wire,
392 401 ``sendresponse()`` is called. It handles emitting the header portion
393 402 of the response message. It then yields chunks of body data to be
394 403 written to the peer. Typically, the WSGI application itself calls
395 404 and returns the value from ``sendresponse()``.
396 405 """
397 406
398 407 def __init__(self, req, startresponse):
399 408 """Create an empty response tied to a specific request.
400 409
401 410 ``req`` is a ``parsedrequest``. ``startresponse`` is the
402 411 ``start_response`` function passed to the WSGI application.
403 412 """
404 413 self._req = req
405 414 self._startresponse = startresponse
406 415
407 416 self.status = None
408 417 from . import wsgiheaders # avoid cycle
409 418
410 419 self.headers = wsgiheaders.Headers([])
411 420
412 421 self._bodybytes = None
413 422 self._bodygen = None
414 423 self._bodywillwrite = False
415 424 self._started = False
416 425 self._bodywritefn = None
417 426
418 427 def _verifybody(self):
419 428 if (
420 429 self._bodybytes is not None
421 430 or self._bodygen is not None
422 431 or self._bodywillwrite
423 432 ):
424 433 raise error.ProgrammingError(b'cannot define body multiple times')
425 434
426 435 def setbodybytes(self, b):
427 436 """Define the response body as static bytes.
428 437
429 438 The empty string signals that there is no response body.
430 439 """
431 440 self._verifybody()
432 441 self._bodybytes = b
433 442 self.headers[b'Content-Length'] = b'%d' % len(b)
434 443
435 444 def setbodygen(self, gen):
436 445 """Define the response body as a generator of bytes."""
437 446 self._verifybody()
438 447 self._bodygen = gen
439 448
440 449 def setbodywillwrite(self):
441 450 """Signal an intent to use write() to emit the response body.
442 451
443 452 **This is the least preferred way to send a body.**
444 453
445 454 It is preferred for WSGI applications to emit a generator of chunks
446 455 constituting the response body. However, some consumers can't emit
447 456 data this way. So, WSGI provides a way to obtain a ``write(data)``
448 457 function that can be used to synchronously perform an unbuffered
449 458 write.
450 459
451 460 Calling this function signals an intent to produce the body in this
452 461 manner.
453 462 """
454 463 self._verifybody()
455 464 self._bodywillwrite = True
456 465
457 466 def sendresponse(self):
458 467 """Send the generated response to the client.
459 468
460 469 Before this is called, ``status`` must be set and one of
461 470 ``setbodybytes()`` or ``setbodygen()`` must be called.
462 471
463 472 Calling this method multiple times is not allowed.
464 473 """
465 474 if self._started:
466 475 raise error.ProgrammingError(
467 476 b'sendresponse() called multiple times'
468 477 )
469 478
470 479 self._started = True
471 480
472 481 if not self.status:
473 482 raise error.ProgrammingError(b'status line not defined')
474 483
475 484 if (
476 485 self._bodybytes is None
477 486 and self._bodygen is None
478 487 and not self._bodywillwrite
479 488 ):
480 489 raise error.ProgrammingError(b'response body not defined')
481 490
482 491 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
483 492 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
484 493 # and SHOULD NOT generate other headers unless they could be used
485 494 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
486 495 # states that no response body can be issued. Content-Length can
487 496 # be sent. But if it is present, it should be the size of the response
488 497 # that wasn't transferred.
489 498 if self.status.startswith(b'304 '):
490 499 # setbodybytes('') will set C-L to 0. This doesn't conform with the
491 500 # spec. So remove it.
492 501 if self.headers.get(b'Content-Length') == b'0':
493 502 del self.headers[b'Content-Length']
494 503
495 504 # Strictly speaking, this is too strict. But until it causes
496 505 # problems, let's be strict.
497 506 badheaders = {
498 507 k
499 508 for k in self.headers.keys()
500 509 if k.lower()
501 510 not in (
502 511 b'date',
503 512 b'etag',
504 513 b'expires',
505 514 b'cache-control',
506 515 b'content-location',
507 516 b'content-security-policy',
508 517 b'vary',
509 518 )
510 519 }
511 520 if badheaders:
512 521 raise error.ProgrammingError(
513 522 b'illegal header on 304 response: %s'
514 523 % b', '.join(sorted(badheaders))
515 524 )
516 525
517 526 if self._bodygen is not None or self._bodywillwrite:
518 527 raise error.ProgrammingError(
519 528 b"must use setbodybytes('') with 304 responses"
520 529 )
521 530
522 531 # Various HTTP clients (notably httplib) won't read the HTTP response
523 532 # until the HTTP request has been sent in full. If servers (us) send a
524 533 # response before the HTTP request has been fully sent, the connection
525 534 # may deadlock because neither end is reading.
526 535 #
527 536 # We work around this by "draining" the request data before
528 537 # sending any response in some conditions.
529 538 drain = False
530 539 close = False
531 540
532 541 # If the client sent Expect: 100-continue, we assume it is smart enough
533 542 # to deal with the server sending a response before reading the request.
534 543 # (httplib doesn't do this.)
535 544 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
536 545 pass
537 546 # Only tend to request methods that have bodies. Strictly speaking,
538 547 # we should sniff for a body. But this is fine for our existing
539 548 # WSGI applications.
540 549 elif self._req.method not in (b'POST', b'PUT'):
541 550 pass
542 551 else:
543 552 # If we don't know how much data to read, there's no guarantee
544 553 # that we can drain the request responsibly. The WSGI
545 554 # specification only says that servers *should* ensure the
546 555 # input stream doesn't overrun the actual request. So there's
547 556 # no guarantee that reading until EOF won't corrupt the stream
548 557 # state.
549 558 if not isinstance(self._req.bodyfh, util.cappedreader):
550 559 close = True
551 560 else:
552 561 # We /could/ only drain certain HTTP response codes. But 200 and
553 562 # non-200 wire protocol responses both require draining. Since
554 563 # we have a capped reader in place for all situations where we
555 564 # drain, it is safe to read from that stream. We'll either do
556 565 # a drain or no-op if we're already at EOF.
557 566 drain = True
558 567
559 568 if close:
560 569 self.headers[b'Connection'] = b'Close'
561 570
562 571 if drain:
563 572 assert isinstance(self._req.bodyfh, util.cappedreader)
564 573 while True:
565 574 chunk = self._req.bodyfh.read(32768)
566 575 if not chunk:
567 576 break
568 577
569 578 strheaders = [
570 579 (pycompat.strurl(k), pycompat.strurl(v))
571 580 for k, v in self.headers.items()
572 581 ]
573 582 write = self._startresponse(pycompat.sysstr(self.status), strheaders)
574 583
575 584 if self._bodybytes:
576 585 yield self._bodybytes
577 586 elif self._bodygen:
578 587 for chunk in self._bodygen:
579 588 # PEP-3333 says that output must be bytes. And some WSGI
580 589 # implementations enforce this. We cast bytes-like types here
581 590 # for convenience.
582 591 if isinstance(chunk, bytearray):
583 592 chunk = bytes(chunk)
584 593
585 594 yield chunk
586 595 elif self._bodywillwrite:
587 596 self._bodywritefn = write
588 597 else:
589 598 error.ProgrammingError(b'do not know how to send body')
590 599
591 600 def getbodyfile(self):
592 601 """Obtain a file object like object representing the response body.
593 602
594 603 For this to work, you must call ``setbodywillwrite()`` and then
595 604 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
596 605 function won't run to completion unless the generator is advanced. The
597 606 generator yields not items. The easiest way to consume it is with
598 607 ``list(res.sendresponse())``, which should resolve to an empty list -
599 608 ``[]``.
600 609 """
601 610 if not self._bodywillwrite:
602 611 raise error.ProgrammingError(b'must call setbodywillwrite() first')
603 612
604 613 if not self._started:
605 614 raise error.ProgrammingError(
606 615 b'must call sendresponse() first; did '
607 616 b'you remember to consume it since it '
608 617 b'is a generator?'
609 618 )
610 619
611 620 assert self._bodywritefn
612 621 return offsettrackingwriter(self._bodywritefn)
613 622
614 623
615 624 def wsgiapplication(app_maker):
616 625 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
617 626 can and should now be used as a WSGI application.'''
618 627 application = app_maker()
619 628
620 629 def run_wsgi(env, respond):
621 630 return application(env, respond)
622 631
623 632 return run_wsgi
@@ -1,439 +1,451 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 from mercurial.hgweb import request as requestmod
6 from mercurial import error
6 from mercurial import error, pycompat
7 7
8 8 DEFAULT_ENV = {
9 9 'REQUEST_METHOD': 'GET',
10 10 'SERVER_NAME': 'testserver',
11 11 'SERVER_PORT': '80',
12 12 'SERVER_PROTOCOL': 'http',
13 13 'wsgi.version': (1, 0),
14 14 'wsgi.url_scheme': 'http',
15 15 'wsgi.input': None,
16 16 'wsgi.errors': None,
17 17 'wsgi.multithread': False,
18 18 'wsgi.multiprocess': True,
19 19 'wsgi.run_once': False,
20 20 }
21 21
22 22
23 23 def parse(env, reponame=None, altbaseurl=None, extra=None):
24 24 env = dict(env)
25 25 env.update(extra or {})
26 26
27 27 return requestmod.parserequestfromenv(
28 28 env, reponame=reponame, altbaseurl=altbaseurl
29 29 )
30 30
31 31
32 32 class ParseRequestTests(unittest.TestCase):
33 33 def testdefault(self):
34 34 r = parse(DEFAULT_ENV)
35 35 self.assertEqual(r.url, b'http://testserver')
36 36 self.assertEqual(r.baseurl, b'http://testserver')
37 37 self.assertEqual(r.advertisedurl, b'http://testserver')
38 38 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
39 39 self.assertEqual(r.urlscheme, b'http')
40 40 self.assertEqual(r.method, b'GET')
41 41 self.assertIsNone(r.remoteuser)
42 42 self.assertIsNone(r.remotehost)
43 43 self.assertEqual(r.apppath, b'')
44 44 self.assertEqual(r.dispatchparts, [])
45 45 self.assertIsNone(r.dispatchpath)
46 46 self.assertIsNone(r.reponame)
47 47 self.assertEqual(r.querystring, b'')
48 48 self.assertEqual(len(r.qsparams), 0)
49 49 self.assertEqual(len(r.headers), 0)
50 50
51 51 def testcustomport(self):
52 52 r = parse(DEFAULT_ENV, extra={'SERVER_PORT': '8000',})
53 53
54 54 self.assertEqual(r.url, b'http://testserver:8000')
55 55 self.assertEqual(r.baseurl, b'http://testserver:8000')
56 56 self.assertEqual(r.advertisedurl, b'http://testserver:8000')
57 57 self.assertEqual(r.advertisedbaseurl, b'http://testserver:8000')
58 58
59 59 r = parse(
60 60 DEFAULT_ENV,
61 61 extra={'SERVER_PORT': '4000', 'wsgi.url_scheme': 'https',},
62 62 )
63 63
64 64 self.assertEqual(r.url, b'https://testserver:4000')
65 65 self.assertEqual(r.baseurl, b'https://testserver:4000')
66 66 self.assertEqual(r.advertisedurl, b'https://testserver:4000')
67 67 self.assertEqual(r.advertisedbaseurl, b'https://testserver:4000')
68 68
69 69 def testhttphost(self):
70 70 r = parse(DEFAULT_ENV, extra={'HTTP_HOST': 'altserver',})
71 71
72 72 self.assertEqual(r.url, b'http://altserver')
73 73 self.assertEqual(r.baseurl, b'http://altserver')
74 74 self.assertEqual(r.advertisedurl, b'http://testserver')
75 75 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
76 76
77 77 def testscriptname(self):
78 78 r = parse(DEFAULT_ENV, extra={'SCRIPT_NAME': '',})
79 79
80 80 self.assertEqual(r.url, b'http://testserver')
81 81 self.assertEqual(r.baseurl, b'http://testserver')
82 82 self.assertEqual(r.advertisedurl, b'http://testserver')
83 83 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
84 84 self.assertEqual(r.apppath, b'')
85 85 self.assertEqual(r.dispatchparts, [])
86 86 self.assertIsNone(r.dispatchpath)
87 87
88 88 r = parse(DEFAULT_ENV, extra={'SCRIPT_NAME': '/script',})
89 89
90 90 self.assertEqual(r.url, b'http://testserver/script')
91 91 self.assertEqual(r.baseurl, b'http://testserver')
92 92 self.assertEqual(r.advertisedurl, b'http://testserver/script')
93 93 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
94 94 self.assertEqual(r.apppath, b'/script')
95 95 self.assertEqual(r.dispatchparts, [])
96 96 self.assertIsNone(r.dispatchpath)
97 97
98 98 r = parse(DEFAULT_ENV, extra={'SCRIPT_NAME': '/multiple words',})
99 99
100 100 self.assertEqual(r.url, b'http://testserver/multiple%20words')
101 101 self.assertEqual(r.baseurl, b'http://testserver')
102 102 self.assertEqual(r.advertisedurl, b'http://testserver/multiple%20words')
103 103 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
104 104 self.assertEqual(r.apppath, b'/multiple words')
105 105 self.assertEqual(r.dispatchparts, [])
106 106 self.assertIsNone(r.dispatchpath)
107 107
108 108 def testpathinfo(self):
109 109 r = parse(DEFAULT_ENV, extra={'PATH_INFO': '',})
110 110
111 111 self.assertEqual(r.url, b'http://testserver')
112 112 self.assertEqual(r.baseurl, b'http://testserver')
113 113 self.assertEqual(r.advertisedurl, b'http://testserver')
114 114 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
115 115 self.assertEqual(r.apppath, b'')
116 116 self.assertEqual(r.dispatchparts, [])
117 117 self.assertEqual(r.dispatchpath, b'')
118 118
119 119 r = parse(DEFAULT_ENV, extra={'PATH_INFO': '/pathinfo',})
120 120
121 121 self.assertEqual(r.url, b'http://testserver/pathinfo')
122 122 self.assertEqual(r.baseurl, b'http://testserver')
123 123 self.assertEqual(r.advertisedurl, b'http://testserver/pathinfo')
124 124 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
125 125 self.assertEqual(r.apppath, b'')
126 126 self.assertEqual(r.dispatchparts, [b'pathinfo'])
127 127 self.assertEqual(r.dispatchpath, b'pathinfo')
128 128
129 129 r = parse(DEFAULT_ENV, extra={'PATH_INFO': '/one/two/',})
130 130
131 131 self.assertEqual(r.url, b'http://testserver/one/two/')
132 132 self.assertEqual(r.baseurl, b'http://testserver')
133 133 self.assertEqual(r.advertisedurl, b'http://testserver/one/two/')
134 134 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
135 135 self.assertEqual(r.apppath, b'')
136 136 self.assertEqual(r.dispatchparts, [b'one', b'two'])
137 137 self.assertEqual(r.dispatchpath, b'one/two')
138 138
139 139 def testscriptandpathinfo(self):
140 140 r = parse(
141 141 DEFAULT_ENV,
142 142 extra={'SCRIPT_NAME': '/script', 'PATH_INFO': '/pathinfo',},
143 143 )
144 144
145 145 self.assertEqual(r.url, b'http://testserver/script/pathinfo')
146 146 self.assertEqual(r.baseurl, b'http://testserver')
147 147 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
148 148 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
149 149 self.assertEqual(r.apppath, b'/script')
150 150 self.assertEqual(r.dispatchparts, [b'pathinfo'])
151 151 self.assertEqual(r.dispatchpath, b'pathinfo')
152 152
153 153 r = parse(
154 154 DEFAULT_ENV,
155 155 extra={
156 156 'SCRIPT_NAME': '/script1/script2',
157 157 'PATH_INFO': '/path1/path2',
158 158 },
159 159 )
160 160
161 161 self.assertEqual(
162 162 r.url, b'http://testserver/script1/script2/path1/path2'
163 163 )
164 164 self.assertEqual(r.baseurl, b'http://testserver')
165 165 self.assertEqual(
166 166 r.advertisedurl, b'http://testserver/script1/script2/path1/path2'
167 167 )
168 168 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
169 169 self.assertEqual(r.apppath, b'/script1/script2')
170 170 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
171 171 self.assertEqual(r.dispatchpath, b'path1/path2')
172 172
173 173 r = parse(
174 174 DEFAULT_ENV,
175 175 extra={
176 176 'HTTP_HOST': 'hostserver',
177 177 'SCRIPT_NAME': '/script',
178 178 'PATH_INFO': '/pathinfo',
179 179 },
180 180 )
181 181
182 182 self.assertEqual(r.url, b'http://hostserver/script/pathinfo')
183 183 self.assertEqual(r.baseurl, b'http://hostserver')
184 184 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
185 185 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
186 186 self.assertEqual(r.apppath, b'/script')
187 187 self.assertEqual(r.dispatchparts, [b'pathinfo'])
188 188 self.assertEqual(r.dispatchpath, b'pathinfo')
189 189
190 190 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
191 191 # Python 3.7 deprecates the regex*p* version, but 2.7 lacks
192 192 # the regex version.
193 193 assertRaisesRegex = ( # camelcase-required
194 194 unittest.TestCase.assertRaisesRegexp
195 195 )
196 196
197 197 def testreponame(self):
198 198 """repository path components get stripped from URL."""
199 199
200 200 with self.assertRaisesRegex(
201 201 error.ProgrammingError, 'reponame requires PATH_INFO'
202 202 ):
203 203 parse(DEFAULT_ENV, reponame=b'repo')
204 204
205 205 with self.assertRaisesRegex(
206 206 error.ProgrammingError, 'PATH_INFO does not begin with repo ' 'name'
207 207 ):
208 208 parse(
209 209 DEFAULT_ENV,
210 210 reponame=b'repo',
211 211 extra={'PATH_INFO': '/pathinfo',},
212 212 )
213 213
214 214 with self.assertRaisesRegex(
215 215 error.ProgrammingError, 'reponame prefix of PATH_INFO'
216 216 ):
217 217 parse(
218 218 DEFAULT_ENV,
219 219 reponame=b'repo',
220 220 extra={'PATH_INFO': '/repoextra/path',},
221 221 )
222 222
223 223 r = parse(
224 224 DEFAULT_ENV,
225 225 reponame=b'repo',
226 226 extra={'PATH_INFO': '/repo/path1/path2',},
227 227 )
228 228
229 229 self.assertEqual(r.url, b'http://testserver/repo/path1/path2')
230 230 self.assertEqual(r.baseurl, b'http://testserver')
231 231 self.assertEqual(r.advertisedurl, b'http://testserver/repo/path1/path2')
232 232 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
233 233 self.assertEqual(r.apppath, b'/repo')
234 234 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
235 235 self.assertEqual(r.dispatchpath, b'path1/path2')
236 236 self.assertEqual(r.reponame, b'repo')
237 237
238 238 r = parse(
239 239 DEFAULT_ENV,
240 240 reponame=b'prefix/repo',
241 241 extra={'PATH_INFO': '/prefix/repo/path1/path2',},
242 242 )
243 243
244 244 self.assertEqual(r.url, b'http://testserver/prefix/repo/path1/path2')
245 245 self.assertEqual(r.baseurl, b'http://testserver')
246 246 self.assertEqual(
247 247 r.advertisedurl, b'http://testserver/prefix/repo/path1/path2'
248 248 )
249 249 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
250 250 self.assertEqual(r.apppath, b'/prefix/repo')
251 251 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
252 252 self.assertEqual(r.dispatchpath, b'path1/path2')
253 253 self.assertEqual(r.reponame, b'prefix/repo')
254 254
255 255 def testaltbaseurl(self):
256 256 # Simple hostname remap.
257 257 r = parse(DEFAULT_ENV, altbaseurl=b'http://altserver')
258 258
259 259 self.assertEqual(r.url, b'http://testserver')
260 260 self.assertEqual(r.baseurl, b'http://testserver')
261 261 self.assertEqual(r.advertisedurl, b'http://altserver')
262 262 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
263 263 self.assertEqual(r.urlscheme, b'http')
264 264 self.assertEqual(r.apppath, b'')
265 265 self.assertEqual(r.dispatchparts, [])
266 266 self.assertIsNone(r.dispatchpath)
267 267 self.assertIsNone(r.reponame)
268 268
269 269 # With a custom port.
270 270 r = parse(DEFAULT_ENV, altbaseurl=b'http://altserver:8000')
271 271 self.assertEqual(r.url, b'http://testserver')
272 272 self.assertEqual(r.baseurl, b'http://testserver')
273 273 self.assertEqual(r.advertisedurl, b'http://altserver:8000')
274 274 self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
275 275 self.assertEqual(r.urlscheme, b'http')
276 276 self.assertEqual(r.apppath, b'')
277 277 self.assertEqual(r.dispatchparts, [])
278 278 self.assertIsNone(r.dispatchpath)
279 279 self.assertIsNone(r.reponame)
280 280
281 281 # With a changed protocol.
282 282 r = parse(DEFAULT_ENV, altbaseurl=b'https://altserver')
283 283 self.assertEqual(r.url, b'http://testserver')
284 284 self.assertEqual(r.baseurl, b'http://testserver')
285 285 self.assertEqual(r.advertisedurl, b'https://altserver')
286 286 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
287 287 # URL scheme is defined as the actual scheme, not advertised.
288 288 self.assertEqual(r.urlscheme, b'http')
289 289 self.assertEqual(r.apppath, b'')
290 290 self.assertEqual(r.dispatchparts, [])
291 291 self.assertIsNone(r.dispatchpath)
292 292 self.assertIsNone(r.reponame)
293 293
294 294 # Need to specify explicit port number for proper https:// alt URLs.
295 295 r = parse(DEFAULT_ENV, altbaseurl=b'https://altserver:443')
296 296 self.assertEqual(r.url, b'http://testserver')
297 297 self.assertEqual(r.baseurl, b'http://testserver')
298 298 self.assertEqual(r.advertisedurl, b'https://altserver')
299 299 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
300 300 self.assertEqual(r.urlscheme, b'http')
301 301 self.assertEqual(r.apppath, b'')
302 302 self.assertEqual(r.dispatchparts, [])
303 303 self.assertIsNone(r.dispatchpath)
304 304 self.assertIsNone(r.reponame)
305 305
306 306 # With only PATH_INFO defined.
307 307 r = parse(
308 308 DEFAULT_ENV,
309 309 altbaseurl=b'http://altserver',
310 310 extra={'PATH_INFO': '/path1/path2',},
311 311 )
312 312 self.assertEqual(r.url, b'http://testserver/path1/path2')
313 313 self.assertEqual(r.baseurl, b'http://testserver')
314 314 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
315 315 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
316 316 self.assertEqual(r.urlscheme, b'http')
317 317 self.assertEqual(r.apppath, b'')
318 318 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
319 319 self.assertEqual(r.dispatchpath, b'path1/path2')
320 320 self.assertIsNone(r.reponame)
321 321
322 322 # Path on alt URL.
323 323 r = parse(DEFAULT_ENV, altbaseurl=b'http://altserver/altpath')
324 324 self.assertEqual(r.url, b'http://testserver')
325 325 self.assertEqual(r.baseurl, b'http://testserver')
326 326 self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
327 327 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
328 328 self.assertEqual(r.urlscheme, b'http')
329 329 self.assertEqual(r.apppath, b'/altpath')
330 330 self.assertEqual(r.dispatchparts, [])
331 331 self.assertIsNone(r.dispatchpath)
332 332 self.assertIsNone(r.reponame)
333 333
334 334 # With a trailing slash.
335 335 r = parse(DEFAULT_ENV, altbaseurl=b'http://altserver/altpath/')
336 336 self.assertEqual(r.url, b'http://testserver')
337 337 self.assertEqual(r.baseurl, b'http://testserver')
338 338 self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
339 339 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
340 340 self.assertEqual(r.urlscheme, b'http')
341 341 self.assertEqual(r.apppath, b'/altpath/')
342 342 self.assertEqual(r.dispatchparts, [])
343 343 self.assertIsNone(r.dispatchpath)
344 344 self.assertIsNone(r.reponame)
345 345
346 346 # PATH_INFO + path on alt URL.
347 347 r = parse(
348 348 DEFAULT_ENV,
349 349 altbaseurl=b'http://altserver/altpath',
350 350 extra={'PATH_INFO': '/path1/path2',},
351 351 )
352 352 self.assertEqual(r.url, b'http://testserver/path1/path2')
353 353 self.assertEqual(r.baseurl, b'http://testserver')
354 354 self.assertEqual(
355 355 r.advertisedurl, b'http://altserver/altpath/path1/path2'
356 356 )
357 357 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
358 358 self.assertEqual(r.urlscheme, b'http')
359 359 self.assertEqual(r.apppath, b'/altpath')
360 360 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
361 361 self.assertEqual(r.dispatchpath, b'path1/path2')
362 362 self.assertIsNone(r.reponame)
363 363
364 364 # PATH_INFO + path on alt URL with trailing slash.
365 365 r = parse(
366 366 DEFAULT_ENV,
367 367 altbaseurl=b'http://altserver/altpath/',
368 368 extra={'PATH_INFO': '/path1/path2',},
369 369 )
370 370 self.assertEqual(r.url, b'http://testserver/path1/path2')
371 371 self.assertEqual(r.baseurl, b'http://testserver')
372 372 self.assertEqual(
373 373 r.advertisedurl, b'http://altserver/altpath//path1/path2'
374 374 )
375 375 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
376 376 self.assertEqual(r.urlscheme, b'http')
377 377 self.assertEqual(r.apppath, b'/altpath/')
378 378 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
379 379 self.assertEqual(r.dispatchpath, b'path1/path2')
380 380 self.assertIsNone(r.reponame)
381 381
382 382 # Local SCRIPT_NAME is ignored.
383 383 r = parse(
384 384 DEFAULT_ENV,
385 385 altbaseurl=b'http://altserver',
386 386 extra={'SCRIPT_NAME': '/script', 'PATH_INFO': '/path1/path2',},
387 387 )
388 388 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
389 389 self.assertEqual(r.baseurl, b'http://testserver')
390 390 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
391 391 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
392 392 self.assertEqual(r.urlscheme, b'http')
393 393 self.assertEqual(r.apppath, b'')
394 394 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
395 395 self.assertEqual(r.dispatchpath, b'path1/path2')
396 396 self.assertIsNone(r.reponame)
397 397
398 398 # Use remote's path for script name, app path
399 399 r = parse(
400 400 DEFAULT_ENV,
401 401 altbaseurl=b'http://altserver/altroot',
402 402 extra={'SCRIPT_NAME': '/script', 'PATH_INFO': '/path1/path2',},
403 403 )
404 404 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
405 405 self.assertEqual(r.baseurl, b'http://testserver')
406 406 self.assertEqual(
407 407 r.advertisedurl, b'http://altserver/altroot/path1/path2'
408 408 )
409 409 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
410 410 self.assertEqual(r.urlscheme, b'http')
411 411 self.assertEqual(r.apppath, b'/altroot')
412 412 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
413 413 self.assertEqual(r.dispatchpath, b'path1/path2')
414 414 self.assertIsNone(r.reponame)
415 415
416 416 # reponame is factored in properly.
417 417 r = parse(
418 418 DEFAULT_ENV,
419 419 reponame=b'repo',
420 420 altbaseurl=b'http://altserver/altroot',
421 421 extra={'SCRIPT_NAME': '/script', 'PATH_INFO': '/repo/path1/path2',},
422 422 )
423 423
424 424 self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
425 425 self.assertEqual(r.baseurl, b'http://testserver')
426 426 self.assertEqual(
427 427 r.advertisedurl, b'http://altserver/altroot/repo/path1/path2'
428 428 )
429 429 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
430 430 self.assertEqual(r.apppath, b'/altroot/repo')
431 431 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
432 432 self.assertEqual(r.dispatchpath, b'path1/path2')
433 433 self.assertEqual(r.reponame, b'repo')
434 434
435 def testenvencoding(self):
436 if pycompat.iswindows:
437 # On Windows, we can't generally know which non-ASCII characters
438 # are supported.
439 r = parse(DEFAULT_ENV, extra={'foo': 'bar'})
440 self.assertEqual(r.rawenv[b'foo'], b'bar')
441 else:
442 # Unix is byte-based. Therefore we test all possible bytes.
443 b = b''.join(pycompat.bytechr(i) for i in range(256))
444 r = parse(DEFAULT_ENV, extra={'foo': pycompat.fsdecode(b)})
445 self.assertEqual(r.rawenv[b'foo'], b)
446
435 447
436 448 if __name__ == '__main__':
437 449 import silenttestrunner
438 450
439 451 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now