##// END OF EJS Templates
hgweb: teach WSGI parser about query strings...
Gregory Szorc -
r36827:3c15b84a default
parent child Browse files
Show More
@@ -1,279 +1,296 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import cgi
12 12 import errno
13 13 import socket
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 26 pycompat,
27 27 util,
28 28 )
29 29
30 30 shortcuts = {
31 31 'cl': [('cmd', ['changelog']), ('rev', None)],
32 32 'sl': [('cmd', ['shortlog']), ('rev', None)],
33 33 'cs': [('cmd', ['changeset']), ('node', None)],
34 34 'f': [('cmd', ['file']), ('filenode', None)],
35 35 'fl': [('cmd', ['filelog']), ('filenode', None)],
36 36 'fd': [('cmd', ['filediff']), ('node', None)],
37 37 'fa': [('cmd', ['annotate']), ('filenode', None)],
38 38 'mf': [('cmd', ['manifest']), ('manifest', None)],
39 39 'ca': [('cmd', ['archive']), ('node', None)],
40 40 'tags': [('cmd', ['tags'])],
41 41 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
42 42 'static': [('cmd', ['static']), ('file', None)]
43 43 }
44 44
45 45 def normalize(form):
46 46 # first expand the shortcuts
47 47 for k in shortcuts:
48 48 if k in form:
49 49 for name, value in shortcuts[k]:
50 50 if value is None:
51 51 value = form[k]
52 52 form[name] = value
53 53 del form[k]
54 54 # And strip the values
55 55 bytesform = {}
56 56 for k, v in form.iteritems():
57 57 bytesform[pycompat.bytesurl(k)] = [
58 58 pycompat.bytesurl(i.strip()) for i in v]
59 59 return bytesform
60 60
61 61 @attr.s(frozen=True)
62 62 class parsedrequest(object):
63 63 """Represents a parsed WSGI request / static HTTP request parameters."""
64 64
65 65 # Full URL for this request.
66 66 url = attr.ib()
67 67 # URL without any path components. Just <proto>://<host><port>.
68 68 baseurl = attr.ib()
69 69 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
70 70 # of HTTP: Host header for hostname. This is likely what clients used.
71 71 advertisedurl = attr.ib()
72 72 advertisedbaseurl = attr.ib()
73 73 # WSGI application path.
74 74 apppath = attr.ib()
75 75 # List of path parts to be used for dispatch.
76 76 dispatchparts = attr.ib()
77 77 # URL path component (no query string) used for dispatch.
78 78 dispatchpath = attr.ib()
79 79 # Raw query string (part after "?" in URL).
80 80 querystring = attr.ib()
81 # List of 2-tuples of query string arguments.
82 querystringlist = attr.ib()
83 # Dict of query string arguments. Values are lists with at least 1 item.
84 querystringdict = attr.ib()
81 85
82 86 def parserequestfromenv(env):
83 87 """Parse URL components from environment variables.
84 88
85 89 WSGI defines request attributes via environment variables. This function
86 90 parses the environment variables into a data structure.
87 91 """
88 92 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
89 93
90 94 # We first validate that the incoming object conforms with the WSGI spec.
91 95 # We only want to be dealing with spec-conforming WSGI implementations.
92 96 # TODO enable this once we fix internal violations.
93 97 #wsgiref.validate.check_environ(env)
94 98
95 99 # PEP-0333 states that environment keys and values are native strings
96 100 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
97 101 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
98 102 # in Mercurial, so mass convert string keys and values to bytes.
99 103 if pycompat.ispy3:
100 104 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
101 105 env = {k: v.encode('latin-1') if isinstance(v, str) else v
102 106 for k, v in env.iteritems()}
103 107
104 108 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
105 109 # the environment variables.
106 110 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
107 111 # how URLs are reconstructed.
108 112 fullurl = env['wsgi.url_scheme'] + '://'
109 113 advertisedfullurl = fullurl
110 114
111 115 def addport(s):
112 116 if env['wsgi.url_scheme'] == 'https':
113 117 if env['SERVER_PORT'] != '443':
114 118 s += ':' + env['SERVER_PORT']
115 119 else:
116 120 if env['SERVER_PORT'] != '80':
117 121 s += ':' + env['SERVER_PORT']
118 122
119 123 return s
120 124
121 125 if env.get('HTTP_HOST'):
122 126 fullurl += env['HTTP_HOST']
123 127 else:
124 128 fullurl += env['SERVER_NAME']
125 129 fullurl = addport(fullurl)
126 130
127 131 advertisedfullurl += env['SERVER_NAME']
128 132 advertisedfullurl = addport(advertisedfullurl)
129 133
130 134 baseurl = fullurl
131 135 advertisedbaseurl = advertisedfullurl
132 136
133 137 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
134 138 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
135 139 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
136 140 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
137 141
138 142 if env.get('QUERY_STRING'):
139 143 fullurl += '?' + env['QUERY_STRING']
140 144 advertisedfullurl += '?' + env['QUERY_STRING']
141 145
142 146 # When dispatching requests, we look at the URL components (PATH_INFO
143 147 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
144 148 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
145 149 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
146 150 # root. We also exclude its path components from PATH_INFO when resolving
147 151 # the dispatch path.
148 152
149 153 apppath = env['SCRIPT_NAME']
150 154
151 155 if env.get('REPO_NAME'):
152 156 if not apppath.endswith('/'):
153 157 apppath += '/'
154 158
155 159 apppath += env.get('REPO_NAME')
156 160
157 161 if 'PATH_INFO' in env:
158 162 dispatchparts = env['PATH_INFO'].strip('/').split('/')
159 163
160 164 # Strip out repo parts.
161 165 repoparts = env.get('REPO_NAME', '').split('/')
162 166 if dispatchparts[:len(repoparts)] == repoparts:
163 167 dispatchparts = dispatchparts[len(repoparts):]
164 168 else:
165 169 dispatchparts = []
166 170
167 171 dispatchpath = '/'.join(dispatchparts)
168 172
169 173 querystring = env.get('QUERY_STRING', '')
170 174
175 # We store as a list so we have ordering information. We also store as
176 # a dict to facilitate fast lookup.
177 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
178
179 querystringdict = {}
180 for k, v in querystringlist:
181 if k in querystringdict:
182 querystringdict[k].append(v)
183 else:
184 querystringdict[k] = [v]
185
171 186 return parsedrequest(url=fullurl, baseurl=baseurl,
172 187 advertisedurl=advertisedfullurl,
173 188 advertisedbaseurl=advertisedbaseurl,
174 189 apppath=apppath,
175 190 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
176 querystring=querystring)
191 querystring=querystring,
192 querystringlist=querystringlist,
193 querystringdict=querystringdict)
177 194
178 195 class wsgirequest(object):
179 196 """Higher-level API for a WSGI request.
180 197
181 198 WSGI applications are invoked with 2 arguments. They are used to
182 199 instantiate instances of this class, which provides higher-level APIs
183 200 for obtaining request parameters, writing HTTP output, etc.
184 201 """
185 202 def __init__(self, wsgienv, start_response):
186 203 version = wsgienv[r'wsgi.version']
187 204 if (version < (1, 0)) or (version >= (2, 0)):
188 205 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
189 206 % version)
190 207 self.inp = wsgienv[r'wsgi.input']
191 208 self.err = wsgienv[r'wsgi.errors']
192 209 self.threaded = wsgienv[r'wsgi.multithread']
193 210 self.multiprocess = wsgienv[r'wsgi.multiprocess']
194 211 self.run_once = wsgienv[r'wsgi.run_once']
195 212 self.env = wsgienv
196 213 self.form = normalize(cgi.parse(self.inp,
197 214 self.env,
198 215 keep_blank_values=1))
199 216 self._start_response = start_response
200 217 self.server_write = None
201 218 self.headers = []
202 219
203 220 def __iter__(self):
204 221 return iter([])
205 222
206 223 def read(self, count=-1):
207 224 return self.inp.read(count)
208 225
209 226 def drain(self):
210 227 '''need to read all data from request, httplib is half-duplex'''
211 228 length = int(self.env.get('CONTENT_LENGTH') or 0)
212 229 for s in util.filechunkiter(self.inp, limit=length):
213 230 pass
214 231
215 232 def respond(self, status, type, filename=None, body=None):
216 233 if not isinstance(type, str):
217 234 type = pycompat.sysstr(type)
218 235 if self._start_response is not None:
219 236 self.headers.append((r'Content-Type', type))
220 237 if filename:
221 238 filename = (filename.rpartition('/')[-1]
222 239 .replace('\\', '\\\\').replace('"', '\\"'))
223 240 self.headers.append(('Content-Disposition',
224 241 'inline; filename="%s"' % filename))
225 242 if body is not None:
226 243 self.headers.append((r'Content-Length', str(len(body))))
227 244
228 245 for k, v in self.headers:
229 246 if not isinstance(v, str):
230 247 raise TypeError('header value must be string: %r' % (v,))
231 248
232 249 if isinstance(status, ErrorResponse):
233 250 self.headers.extend(status.headers)
234 251 if status.code == HTTP_NOT_MODIFIED:
235 252 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
236 253 # it MUST NOT include any headers other than these and no
237 254 # body
238 255 self.headers = [(k, v) for (k, v) in self.headers if
239 256 k in ('Date', 'ETag', 'Expires',
240 257 'Cache-Control', 'Vary')]
241 258 status = statusmessage(status.code, pycompat.bytestr(status))
242 259 elif status == 200:
243 260 status = '200 Script output follows'
244 261 elif isinstance(status, int):
245 262 status = statusmessage(status)
246 263
247 264 self.server_write = self._start_response(
248 265 pycompat.sysstr(status), self.headers)
249 266 self._start_response = None
250 267 self.headers = []
251 268 if body is not None:
252 269 self.write(body)
253 270 self.server_write = None
254 271
255 272 def write(self, thing):
256 273 if thing:
257 274 try:
258 275 self.server_write(thing)
259 276 except socket.error as inst:
260 277 if inst[0] != errno.ECONNRESET:
261 278 raise
262 279
263 280 def writelines(self, lines):
264 281 for line in lines:
265 282 self.write(line)
266 283
267 284 def flush(self):
268 285 return None
269 286
270 287 def close(self):
271 288 return None
272 289
273 290 def wsgiapplication(app_maker):
274 291 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
275 292 can and should now be used as a WSGI application.'''
276 293 application = app_maker()
277 294 def run_wsgi(env, respond):
278 295 return application(env, respond)
279 296 return run_wsgi
@@ -1,190 +1,192 b''
1 1 # urllibcompat.py - adapters to ease using urllib2 on Py2 and urllib on Py3
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 from . import pycompat
10 10
11 11 _sysstr = pycompat.sysstr
12 12
13 13 class _pycompatstub(object):
14 14 def __init__(self):
15 15 self._aliases = {}
16 16
17 17 def _registeraliases(self, origin, items):
18 18 """Add items that will be populated at the first access"""
19 19 items = map(_sysstr, items)
20 20 self._aliases.update(
21 21 (item.replace(_sysstr('_'), _sysstr('')).lower(), (origin, item))
22 22 for item in items)
23 23
24 24 def _registeralias(self, origin, attr, name):
25 25 """Alias ``origin``.``attr`` as ``name``"""
26 26 self._aliases[_sysstr(name)] = (origin, _sysstr(attr))
27 27
28 28 def __getattr__(self, name):
29 29 try:
30 30 origin, item = self._aliases[name]
31 31 except KeyError:
32 32 raise AttributeError(name)
33 33 self.__dict__[name] = obj = getattr(origin, item)
34 34 return obj
35 35
36 36 httpserver = _pycompatstub()
37 37 urlreq = _pycompatstub()
38 38 urlerr = _pycompatstub()
39 39
40 40 if pycompat.ispy3:
41 41 import urllib.parse
42 42 urlreq._registeraliases(urllib.parse, (
43 43 "splitattr",
44 44 "splitpasswd",
45 45 "splitport",
46 46 "splituser",
47 47 "urlparse",
48 48 "urlunparse",
49 49 ))
50 50 urlreq._registeralias(urllib.parse, "parse_qs", "parseqs")
51 urlreq._registeralias(urllib.parse, "parse_qsl", "parseqsl")
51 52 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
52 53 import urllib.request
53 54 urlreq._registeraliases(urllib.request, (
54 55 "AbstractHTTPHandler",
55 56 "BaseHandler",
56 57 "build_opener",
57 58 "FileHandler",
58 59 "FTPHandler",
59 60 "ftpwrapper",
60 61 "HTTPHandler",
61 62 "HTTPSHandler",
62 63 "install_opener",
63 64 "pathname2url",
64 65 "HTTPBasicAuthHandler",
65 66 "HTTPDigestAuthHandler",
66 67 "HTTPPasswordMgrWithDefaultRealm",
67 68 "ProxyHandler",
68 69 "Request",
69 70 "url2pathname",
70 71 "urlopen",
71 72 ))
72 73 import urllib.response
73 74 urlreq._registeraliases(urllib.response, (
74 75 "addclosehook",
75 76 "addinfourl",
76 77 ))
77 78 import urllib.error
78 79 urlerr._registeraliases(urllib.error, (
79 80 "HTTPError",
80 81 "URLError",
81 82 ))
82 83 import http.server
83 84 httpserver._registeraliases(http.server, (
84 85 "HTTPServer",
85 86 "BaseHTTPRequestHandler",
86 87 "SimpleHTTPRequestHandler",
87 88 "CGIHTTPRequestHandler",
88 89 ))
89 90
90 91 # urllib.parse.quote() accepts both str and bytes, decodes bytes
91 92 # (if necessary), and returns str. This is wonky. We provide a custom
92 93 # implementation that only accepts bytes and emits bytes.
93 94 def quote(s, safe=r'/'):
94 95 s = urllib.parse.quote_from_bytes(s, safe=safe)
95 96 return s.encode('ascii', 'strict')
96 97
97 98 # urllib.parse.urlencode() returns str. We use this function to make
98 99 # sure we return bytes.
99 100 def urlencode(query, doseq=False):
100 101 s = urllib.parse.urlencode(query, doseq=doseq)
101 102 return s.encode('ascii')
102 103
103 104 urlreq.quote = quote
104 105 urlreq.urlencode = urlencode
105 106
106 107 def getfullurl(req):
107 108 return req.full_url
108 109
109 110 def gethost(req):
110 111 return req.host
111 112
112 113 def getselector(req):
113 114 return req.selector
114 115
115 116 def getdata(req):
116 117 return req.data
117 118
118 119 def hasdata(req):
119 120 return req.data is not None
120 121 else:
121 122 import BaseHTTPServer
122 123 import CGIHTTPServer
123 124 import SimpleHTTPServer
124 125 import urllib2
125 126 import urllib
126 127 import urlparse
127 128 urlreq._registeraliases(urllib, (
128 129 "addclosehook",
129 130 "addinfourl",
130 131 "ftpwrapper",
131 132 "pathname2url",
132 133 "quote",
133 134 "splitattr",
134 135 "splitpasswd",
135 136 "splitport",
136 137 "splituser",
137 138 "unquote",
138 139 "url2pathname",
139 140 "urlencode",
140 141 ))
141 142 urlreq._registeraliases(urllib2, (
142 143 "AbstractHTTPHandler",
143 144 "BaseHandler",
144 145 "build_opener",
145 146 "FileHandler",
146 147 "FTPHandler",
147 148 "HTTPBasicAuthHandler",
148 149 "HTTPDigestAuthHandler",
149 150 "HTTPHandler",
150 151 "HTTPPasswordMgrWithDefaultRealm",
151 152 "HTTPSHandler",
152 153 "install_opener",
153 154 "ProxyHandler",
154 155 "Request",
155 156 "urlopen",
156 157 ))
157 158 urlreq._registeraliases(urlparse, (
158 159 "urlparse",
159 160 "urlunparse",
160 161 ))
161 162 urlreq._registeralias(urlparse, "parse_qs", "parseqs")
163 urlreq._registeralias(urlparse, "parse_qsl", "parseqsl")
162 164 urlerr._registeraliases(urllib2, (
163 165 "HTTPError",
164 166 "URLError",
165 167 ))
166 168 httpserver._registeraliases(BaseHTTPServer, (
167 169 "HTTPServer",
168 170 "BaseHTTPRequestHandler",
169 171 ))
170 172 httpserver._registeraliases(SimpleHTTPServer, (
171 173 "SimpleHTTPRequestHandler",
172 174 ))
173 175 httpserver._registeraliases(CGIHTTPServer, (
174 176 "CGIHTTPRequestHandler",
175 177 ))
176 178
177 179 def gethost(req):
178 180 return req.get_host()
179 181
180 182 def getselector(req):
181 183 return req.get_selector()
182 184
183 185 def getfullurl(req):
184 186 return req.get_full_url()
185 187
186 188 def getdata(req):
187 189 return req.get_data()
188 190
189 191 def hasdata(req):
190 192 return req.has_data()
General Comments 0
You need to be logged in to leave comments. Login now