Show More
@@ -316,6 +316,7 b' class hgweb(object):' | |||
|
316 | 316 | yield r |
|
317 | 317 | |
|
318 | 318 | def _runwsgi(self, wsgireq, repo): |
|
319 | req = requestmod.parserequestfromenv(wsgireq.env) | |
|
319 | 320 | rctx = requestcontext(self, repo) |
|
320 | 321 | |
|
321 | 322 | # This state is global across all threads. |
@@ -329,14 +330,7 b' class hgweb(object):' | |||
|
329 | 330 | if h[0] != 'Content-Security-Policy'] |
|
330 | 331 | wsgireq.headers.append(('Content-Security-Policy', rctx.csp)) |
|
331 | 332 | |
|
332 | # work with CGI variables to create coherent structure | |
|
333 | # use SCRIPT_NAME, PATH_INFO and QUERY_STRING as well as our REPO_NAME | |
|
334 | ||
|
335 | wsgireq.url = wsgireq.env[r'SCRIPT_NAME'] | |
|
336 | if not wsgireq.url.endswith(r'/'): | |
|
337 | wsgireq.url += r'/' | |
|
338 | if wsgireq.env.get('REPO_NAME'): | |
|
339 | wsgireq.url += wsgireq.env[r'REPO_NAME'] + r'/' | |
|
333 | wsgireq.url = pycompat.sysstr(req.apppath) | |
|
340 | 334 | |
|
341 | 335 | if r'PATH_INFO' in wsgireq.env: |
|
342 | 336 | parts = wsgireq.env[r'PATH_INFO'].strip(r'/').split(r'/') |
@@ -11,6 +11,7 b' from __future__ import absolute_import' | |||
|
11 | 11 | import cgi |
|
12 | 12 | import errno |
|
13 | 13 | import socket |
|
14 | #import wsgiref.validate | |
|
14 | 15 | |
|
15 | 16 | from .common import ( |
|
16 | 17 | ErrorResponse, |
@@ -18,6 +19,9 b' from .common import (' | |||
|
18 | 19 | statusmessage, |
|
19 | 20 | ) |
|
20 | 21 | |
|
22 | from ..thirdparty import ( | |
|
23 | attr, | |
|
24 | ) | |
|
21 | 25 | from .. import ( |
|
22 | 26 | pycompat, |
|
23 | 27 | util, |
@@ -54,6 +58,124 b' def normalize(form):' | |||
|
54 | 58 | pycompat.bytesurl(i.strip()) for i in v] |
|
55 | 59 | return bytesform |
|
56 | 60 | |
|
61 | @attr.s(frozen=True) | |
|
62 | class parsedrequest(object): | |
|
63 | """Represents a parsed WSGI request / static HTTP request parameters.""" | |
|
64 | ||
|
65 | # Full URL for this request. | |
|
66 | url = attr.ib() | |
|
67 | # URL without any path components. Just <proto>://<host><port>. | |
|
68 | baseurl = attr.ib() | |
|
69 | # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead | |
|
70 | # of HTTP: Host header for hostname. This is likely what clients used. | |
|
71 | advertisedurl = attr.ib() | |
|
72 | advertisedbaseurl = attr.ib() | |
|
73 | # WSGI application path. | |
|
74 | apppath = attr.ib() | |
|
75 | # List of path parts to be used for dispatch. | |
|
76 | dispatchparts = attr.ib() | |
|
77 | # URL path component (no query string) used for dispatch. | |
|
78 | dispatchpath = attr.ib() | |
|
79 | # Raw query string (part after "?" in URL). | |
|
80 | querystring = attr.ib() | |
|
81 | ||
|
82 | def parserequestfromenv(env): | |
|
83 | """Parse URL components from environment variables. | |
|
84 | ||
|
85 | WSGI defines request attributes via environment variables. This function | |
|
86 | parses the environment variables into a data structure. | |
|
87 | """ | |
|
88 | # PEP-0333 defines the WSGI spec and is a useful reference for this code. | |
|
89 | ||
|
90 | # We first validate that the incoming object conforms with the WSGI spec. | |
|
91 | # We only want to be dealing with spec-conforming WSGI implementations. | |
|
92 | # TODO enable this once we fix internal violations. | |
|
93 | #wsgiref.validate.check_environ(env) | |
|
94 | ||
|
95 | # PEP-0333 states that environment keys and values are native strings | |
|
96 | # (bytes on Python 2 and str on Python 3). The code points for the Unicode | |
|
97 | # strings on Python 3 must be between \00000-\000FF. We deal with bytes | |
|
98 | # in Mercurial, so mass convert string keys and values to bytes. | |
|
99 | if pycompat.ispy3: | |
|
100 | env = {k.encode('latin-1'): v for k, v in env.iteritems()} | |
|
101 | env = {k: v.encode('latin-1') if isinstance(v, str) else v | |
|
102 | for k, v in env.iteritems()} | |
|
103 | ||
|
104 | # https://www.python.org/dev/peps/pep-0333/#environ-variables defines | |
|
105 | # the environment variables. | |
|
106 | # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines | |
|
107 | # how URLs are reconstructed. | |
|
108 | fullurl = env['wsgi.url_scheme'] + '://' | |
|
109 | advertisedfullurl = fullurl | |
|
110 | ||
|
111 | def addport(s): | |
|
112 | if env['wsgi.url_scheme'] == 'https': | |
|
113 | if env['SERVER_PORT'] != '443': | |
|
114 | s += ':' + env['SERVER_PORT'] | |
|
115 | else: | |
|
116 | if env['SERVER_PORT'] != '80': | |
|
117 | s += ':' + env['SERVER_PORT'] | |
|
118 | ||
|
119 | return s | |
|
120 | ||
|
121 | if env.get('HTTP_HOST'): | |
|
122 | fullurl += env['HTTP_HOST'] | |
|
123 | else: | |
|
124 | fullurl += env['SERVER_NAME'] | |
|
125 | fullurl = addport(fullurl) | |
|
126 | ||
|
127 | advertisedfullurl += env['SERVER_NAME'] | |
|
128 | advertisedfullurl = addport(advertisedfullurl) | |
|
129 | ||
|
130 | baseurl = fullurl | |
|
131 | advertisedbaseurl = advertisedfullurl | |
|
132 | ||
|
133 | fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | |
|
134 | advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | |
|
135 | fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | |
|
136 | advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | |
|
137 | ||
|
138 | if env.get('QUERY_STRING'): | |
|
139 | fullurl += '?' + env['QUERY_STRING'] | |
|
140 | advertisedfullurl += '?' + env['QUERY_STRING'] | |
|
141 | ||
|
142 | # When dispatching requests, we look at the URL components (PATH_INFO | |
|
143 | # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir | |
|
144 | # has the concept of "virtual" repositories. This is defined via REPO_NAME. | |
|
145 | # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app | |
|
146 | # root. We also exclude its path components from PATH_INFO when resolving | |
|
147 | # the dispatch path. | |
|
148 | ||
|
149 | # TODO the use of trailing slashes in apppath is arguably wrong. We need it | |
|
150 | # to appease low-level parts of hgweb_mod for now. | |
|
151 | apppath = env['SCRIPT_NAME'] | |
|
152 | if not apppath.endswith('/'): | |
|
153 | apppath += '/' | |
|
154 | ||
|
155 | if env.get('REPO_NAME'): | |
|
156 | apppath += env.get('REPO_NAME') + '/' | |
|
157 | ||
|
158 | if 'PATH_INFO' in env: | |
|
159 | dispatchparts = env['PATH_INFO'].strip('/').split('/') | |
|
160 | ||
|
161 | # Strip out repo parts. | |
|
162 | repoparts = env.get('REPO_NAME', '').split('/') | |
|
163 | if dispatchparts[:len(repoparts)] == repoparts: | |
|
164 | dispatchparts = dispatchparts[len(repoparts):] | |
|
165 | else: | |
|
166 | dispatchparts = [] | |
|
167 | ||
|
168 | dispatchpath = '/'.join(dispatchparts) | |
|
169 | ||
|
170 | querystring = env.get('QUERY_STRING', '') | |
|
171 | ||
|
172 | return parsedrequest(url=fullurl, baseurl=baseurl, | |
|
173 | advertisedurl=advertisedfullurl, | |
|
174 | advertisedbaseurl=advertisedbaseurl, | |
|
175 | apppath=apppath, | |
|
176 | dispatchparts=dispatchparts, dispatchpath=dispatchpath, | |
|
177 | querystring=querystring) | |
|
178 | ||
|
57 | 179 | class wsgirequest(object): |
|
58 | 180 | """Higher-level API for a WSGI request. |
|
59 | 181 |
General Comments 0
You need to be logged in to leave comments.
Login now