##// END OF EJS Templates
hgweb: parse WSGI request into a data structure...
Gregory Szorc -
r36824:69b2d090 default
parent child Browse files
Show More
@@ -316,6 +316,7 b' class hgweb(object):'
316 yield r
316 yield r
317
317
318 def _runwsgi(self, wsgireq, repo):
318 def _runwsgi(self, wsgireq, repo):
319 req = requestmod.parserequestfromenv(wsgireq.env)
319 rctx = requestcontext(self, repo)
320 rctx = requestcontext(self, repo)
320
321
321 # This state is global across all threads.
322 # This state is global across all threads.
@@ -329,14 +330,7 b' class hgweb(object):'
329 if h[0] != 'Content-Security-Policy']
330 if h[0] != 'Content-Security-Policy']
330 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
331 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
331
332
332 # work with CGI variables to create coherent structure
333 wsgireq.url = pycompat.sysstr(req.apppath)
333 # use SCRIPT_NAME, PATH_INFO and QUERY_STRING as well as our REPO_NAME
334
335 wsgireq.url = wsgireq.env[r'SCRIPT_NAME']
336 if not wsgireq.url.endswith(r'/'):
337 wsgireq.url += r'/'
338 if wsgireq.env.get('REPO_NAME'):
339 wsgireq.url += wsgireq.env[r'REPO_NAME'] + r'/'
340
334
341 if r'PATH_INFO' in wsgireq.env:
335 if r'PATH_INFO' in wsgireq.env:
342 parts = wsgireq.env[r'PATH_INFO'].strip(r'/').split(r'/')
336 parts = wsgireq.env[r'PATH_INFO'].strip(r'/').split(r'/')
@@ -11,6 +11,7 b' from __future__ import absolute_import'
11 import cgi
11 import cgi
12 import errno
12 import errno
13 import socket
13 import socket
14 #import wsgiref.validate
14
15
15 from .common import (
16 from .common import (
16 ErrorResponse,
17 ErrorResponse,
@@ -18,6 +19,9 b' from .common import ('
18 statusmessage,
19 statusmessage,
19 )
20 )
20
21
22 from ..thirdparty import (
23 attr,
24 )
21 from .. import (
25 from .. import (
22 pycompat,
26 pycompat,
23 util,
27 util,
@@ -54,6 +58,124 b' def normalize(form):'
54 pycompat.bytesurl(i.strip()) for i in v]
58 pycompat.bytesurl(i.strip()) for i in v]
55 return bytesform
59 return bytesform
56
60
61 @attr.s(frozen=True)
62 class parsedrequest(object):
63 """Represents a parsed WSGI request / static HTTP request parameters."""
64
65 # Full URL for this request.
66 url = attr.ib()
67 # URL without any path components. Just <proto>://<host><port>.
68 baseurl = attr.ib()
69 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
70 # of HTTP: Host header for hostname. This is likely what clients used.
71 advertisedurl = attr.ib()
72 advertisedbaseurl = attr.ib()
73 # WSGI application path.
74 apppath = attr.ib()
75 # List of path parts to be used for dispatch.
76 dispatchparts = attr.ib()
77 # URL path component (no query string) used for dispatch.
78 dispatchpath = attr.ib()
79 # Raw query string (part after "?" in URL).
80 querystring = attr.ib()
81
82 def parserequestfromenv(env):
83 """Parse URL components from environment variables.
84
85 WSGI defines request attributes via environment variables. This function
86 parses the environment variables into a data structure.
87 """
88 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
89
90 # We first validate that the incoming object conforms with the WSGI spec.
91 # We only want to be dealing with spec-conforming WSGI implementations.
92 # TODO enable this once we fix internal violations.
93 #wsgiref.validate.check_environ(env)
94
95 # PEP-0333 states that environment keys and values are native strings
96 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
97 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
98 # in Mercurial, so mass convert string keys and values to bytes.
99 if pycompat.ispy3:
100 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
101 env = {k: v.encode('latin-1') if isinstance(v, str) else v
102 for k, v in env.iteritems()}
103
104 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
105 # the environment variables.
106 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
107 # how URLs are reconstructed.
108 fullurl = env['wsgi.url_scheme'] + '://'
109 advertisedfullurl = fullurl
110
111 def addport(s):
112 if env['wsgi.url_scheme'] == 'https':
113 if env['SERVER_PORT'] != '443':
114 s += ':' + env['SERVER_PORT']
115 else:
116 if env['SERVER_PORT'] != '80':
117 s += ':' + env['SERVER_PORT']
118
119 return s
120
121 if env.get('HTTP_HOST'):
122 fullurl += env['HTTP_HOST']
123 else:
124 fullurl += env['SERVER_NAME']
125 fullurl = addport(fullurl)
126
127 advertisedfullurl += env['SERVER_NAME']
128 advertisedfullurl = addport(advertisedfullurl)
129
130 baseurl = fullurl
131 advertisedbaseurl = advertisedfullurl
132
133 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
134 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
135 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
136 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
137
138 if env.get('QUERY_STRING'):
139 fullurl += '?' + env['QUERY_STRING']
140 advertisedfullurl += '?' + env['QUERY_STRING']
141
142 # When dispatching requests, we look at the URL components (PATH_INFO
143 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
144 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
145 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
146 # root. We also exclude its path components from PATH_INFO when resolving
147 # the dispatch path.
148
149 # TODO the use of trailing slashes in apppath is arguably wrong. We need it
150 # to appease low-level parts of hgweb_mod for now.
151 apppath = env['SCRIPT_NAME']
152 if not apppath.endswith('/'):
153 apppath += '/'
154
155 if env.get('REPO_NAME'):
156 apppath += env.get('REPO_NAME') + '/'
157
158 if 'PATH_INFO' in env:
159 dispatchparts = env['PATH_INFO'].strip('/').split('/')
160
161 # Strip out repo parts.
162 repoparts = env.get('REPO_NAME', '').split('/')
163 if dispatchparts[:len(repoparts)] == repoparts:
164 dispatchparts = dispatchparts[len(repoparts):]
165 else:
166 dispatchparts = []
167
168 dispatchpath = '/'.join(dispatchparts)
169
170 querystring = env.get('QUERY_STRING', '')
171
172 return parsedrequest(url=fullurl, baseurl=baseurl,
173 advertisedurl=advertisedfullurl,
174 advertisedbaseurl=advertisedbaseurl,
175 apppath=apppath,
176 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
177 querystring=querystring)
178
57 class wsgirequest(object):
179 class wsgirequest(object):
58 """Higher-level API for a WSGI request.
180 """Higher-level API for a WSGI request.
59
181
General Comments 0
You need to be logged in to leave comments. Login now