##// END OF EJS Templates
hgweb: remove support for short query string based aliases (BC)...
Gregory Szorc -
r36875:422be995 default
parent child Browse files
Show More
@@ -1,370 +1,339 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 26 pycompat,
27 27 util,
28 28 )
29 29
30 shortcuts = {
31 'cl': [('cmd', ['changelog']), ('rev', None)],
32 'sl': [('cmd', ['shortlog']), ('rev', None)],
33 'cs': [('cmd', ['changeset']), ('node', None)],
34 'f': [('cmd', ['file']), ('filenode', None)],
35 'fl': [('cmd', ['filelog']), ('filenode', None)],
36 'fd': [('cmd', ['filediff']), ('node', None)],
37 'fa': [('cmd', ['annotate']), ('filenode', None)],
38 'mf': [('cmd', ['manifest']), ('manifest', None)],
39 'ca': [('cmd', ['archive']), ('node', None)],
40 'tags': [('cmd', ['tags'])],
41 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
42 'static': [('cmd', ['static']), ('file', None)]
43 }
44
45 def normalize(form):
46 # first expand the shortcuts
47 for k in shortcuts:
48 if k in form:
49 for name, value in shortcuts[k]:
50 if value is None:
51 value = form[k]
52 form[name] = value
53 del form[k]
54 # And strip the values
55 bytesform = {}
56 for k, v in form.iteritems():
57 bytesform[pycompat.bytesurl(k)] = [
58 pycompat.bytesurl(i.strip()) for i in v]
59 return bytesform
60
61 30 @attr.s(frozen=True)
62 31 class parsedrequest(object):
63 32 """Represents a parsed WSGI request.
64 33
65 34 Contains both parsed parameters as well as a handle on the input stream.
66 35 """
67 36
68 37 # Request method.
69 38 method = attr.ib()
70 39 # Full URL for this request.
71 40 url = attr.ib()
72 41 # URL without any path components. Just <proto>://<host><port>.
73 42 baseurl = attr.ib()
74 43 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
75 44 # of HTTP: Host header for hostname. This is likely what clients used.
76 45 advertisedurl = attr.ib()
77 46 advertisedbaseurl = attr.ib()
78 47 # WSGI application path.
79 48 apppath = attr.ib()
80 49 # List of path parts to be used for dispatch.
81 50 dispatchparts = attr.ib()
82 51 # URL path component (no query string) used for dispatch.
83 52 dispatchpath = attr.ib()
84 53 # Whether there is a path component to this request. This can be true
85 54 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
86 55 havepathinfo = attr.ib()
87 56 # Raw query string (part after "?" in URL).
88 57 querystring = attr.ib()
89 58 # List of 2-tuples of query string arguments.
90 59 querystringlist = attr.ib()
91 60 # Dict of query string arguments. Values are lists with at least 1 item.
92 61 querystringdict = attr.ib()
93 62 # wsgiref.headers.Headers instance. Operates like a dict with case
94 63 # insensitive keys.
95 64 headers = attr.ib()
96 65 # Request body input stream.
97 66 bodyfh = attr.ib()
98 67
99 68 def parserequestfromenv(env, bodyfh):
100 69 """Parse URL components from environment variables.
101 70
102 71 WSGI defines request attributes via environment variables. This function
103 72 parses the environment variables into a data structure.
104 73 """
105 74 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
106 75
107 76 # We first validate that the incoming object conforms with the WSGI spec.
108 77 # We only want to be dealing with spec-conforming WSGI implementations.
109 78 # TODO enable this once we fix internal violations.
110 79 #wsgiref.validate.check_environ(env)
111 80
112 81 # PEP-0333 states that environment keys and values are native strings
113 82 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
114 83 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
115 84 # in Mercurial, so mass convert string keys and values to bytes.
116 85 if pycompat.ispy3:
117 86 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
118 87 env = {k: v.encode('latin-1') if isinstance(v, str) else v
119 88 for k, v in env.iteritems()}
120 89
121 90 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
122 91 # the environment variables.
123 92 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
124 93 # how URLs are reconstructed.
125 94 fullurl = env['wsgi.url_scheme'] + '://'
126 95 advertisedfullurl = fullurl
127 96
128 97 def addport(s):
129 98 if env['wsgi.url_scheme'] == 'https':
130 99 if env['SERVER_PORT'] != '443':
131 100 s += ':' + env['SERVER_PORT']
132 101 else:
133 102 if env['SERVER_PORT'] != '80':
134 103 s += ':' + env['SERVER_PORT']
135 104
136 105 return s
137 106
138 107 if env.get('HTTP_HOST'):
139 108 fullurl += env['HTTP_HOST']
140 109 else:
141 110 fullurl += env['SERVER_NAME']
142 111 fullurl = addport(fullurl)
143 112
144 113 advertisedfullurl += env['SERVER_NAME']
145 114 advertisedfullurl = addport(advertisedfullurl)
146 115
147 116 baseurl = fullurl
148 117 advertisedbaseurl = advertisedfullurl
149 118
150 119 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
151 120 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
152 121 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
153 122 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
154 123
155 124 if env.get('QUERY_STRING'):
156 125 fullurl += '?' + env['QUERY_STRING']
157 126 advertisedfullurl += '?' + env['QUERY_STRING']
158 127
159 128 # When dispatching requests, we look at the URL components (PATH_INFO
160 129 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
161 130 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
162 131 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
163 132 # root. We also exclude its path components from PATH_INFO when resolving
164 133 # the dispatch path.
165 134
166 135 apppath = env['SCRIPT_NAME']
167 136
168 137 if env.get('REPO_NAME'):
169 138 if not apppath.endswith('/'):
170 139 apppath += '/'
171 140
172 141 apppath += env.get('REPO_NAME')
173 142
174 143 if 'PATH_INFO' in env:
175 144 dispatchparts = env['PATH_INFO'].strip('/').split('/')
176 145
177 146 # Strip out repo parts.
178 147 repoparts = env.get('REPO_NAME', '').split('/')
179 148 if dispatchparts[:len(repoparts)] == repoparts:
180 149 dispatchparts = dispatchparts[len(repoparts):]
181 150 else:
182 151 dispatchparts = []
183 152
184 153 dispatchpath = '/'.join(dispatchparts)
185 154
186 155 querystring = env.get('QUERY_STRING', '')
187 156
188 157 # We store as a list so we have ordering information. We also store as
189 158 # a dict to facilitate fast lookup.
190 159 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
191 160
192 161 querystringdict = {}
193 162 for k, v in querystringlist:
194 163 if k in querystringdict:
195 164 querystringdict[k].append(v)
196 165 else:
197 166 querystringdict[k] = [v]
198 167
199 168 # HTTP_* keys contain HTTP request headers. The Headers structure should
200 169 # perform case normalization for us. We just rewrite underscore to dash
201 170 # so keys match what likely went over the wire.
202 171 headers = []
203 172 for k, v in env.iteritems():
204 173 if k.startswith('HTTP_'):
205 174 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
206 175
207 176 headers = wsgiheaders.Headers(headers)
208 177
209 178 # This is kind of a lie because the HTTP header wasn't explicitly
210 179 # sent. But for all intents and purposes it should be OK to lie about
211 180 # this, since a consumer will either either value to determine how many
212 181 # bytes are available to read.
213 182 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
214 183 headers['Content-Length'] = env['CONTENT_LENGTH']
215 184
216 185 # TODO do this once we remove wsgirequest.inp, otherwise we could have
217 186 # multiple readers from the underlying input stream.
218 187 #bodyfh = env['wsgi.input']
219 188 #if 'Content-Length' in headers:
220 189 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
221 190
222 191 return parsedrequest(method=env['REQUEST_METHOD'],
223 192 url=fullurl, baseurl=baseurl,
224 193 advertisedurl=advertisedfullurl,
225 194 advertisedbaseurl=advertisedbaseurl,
226 195 apppath=apppath,
227 196 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
228 197 havepathinfo='PATH_INFO' in env,
229 198 querystring=querystring,
230 199 querystringlist=querystringlist,
231 200 querystringdict=querystringdict,
232 201 headers=headers,
233 202 bodyfh=bodyfh)
234 203
235 204 class wsgirequest(object):
236 205 """Higher-level API for a WSGI request.
237 206
238 207 WSGI applications are invoked with 2 arguments. They are used to
239 208 instantiate instances of this class, which provides higher-level APIs
240 209 for obtaining request parameters, writing HTTP output, etc.
241 210 """
242 211 def __init__(self, wsgienv, start_response):
243 212 version = wsgienv[r'wsgi.version']
244 213 if (version < (1, 0)) or (version >= (2, 0)):
245 214 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
246 215 % version)
247 216
248 217 inp = wsgienv[r'wsgi.input']
249 218
250 219 if r'HTTP_CONTENT_LENGTH' in wsgienv:
251 220 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
252 221 elif r'CONTENT_LENGTH' in wsgienv:
253 222 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
254 223
255 224 self.err = wsgienv[r'wsgi.errors']
256 225 self.threaded = wsgienv[r'wsgi.multithread']
257 226 self.multiprocess = wsgienv[r'wsgi.multiprocess']
258 227 self.run_once = wsgienv[r'wsgi.run_once']
259 228 self.env = wsgienv
260 229 self.req = parserequestfromenv(wsgienv, inp)
261 self.form = normalize(self.req.querystringdict)
230 self.form = self.req.querystringdict
262 231 self._start_response = start_response
263 232 self.server_write = None
264 233 self.headers = []
265 234
266 235 def respond(self, status, type, filename=None, body=None):
267 236 if not isinstance(type, str):
268 237 type = pycompat.sysstr(type)
269 238 if self._start_response is not None:
270 239 self.headers.append((r'Content-Type', type))
271 240 if filename:
272 241 filename = (filename.rpartition('/')[-1]
273 242 .replace('\\', '\\\\').replace('"', '\\"'))
274 243 self.headers.append(('Content-Disposition',
275 244 'inline; filename="%s"' % filename))
276 245 if body is not None:
277 246 self.headers.append((r'Content-Length', str(len(body))))
278 247
279 248 for k, v in self.headers:
280 249 if not isinstance(v, str):
281 250 raise TypeError('header value must be string: %r' % (v,))
282 251
283 252 if isinstance(status, ErrorResponse):
284 253 self.headers.extend(status.headers)
285 254 if status.code == HTTP_NOT_MODIFIED:
286 255 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
287 256 # it MUST NOT include any headers other than these and no
288 257 # body
289 258 self.headers = [(k, v) for (k, v) in self.headers if
290 259 k in ('Date', 'ETag', 'Expires',
291 260 'Cache-Control', 'Vary')]
292 261 status = statusmessage(status.code, pycompat.bytestr(status))
293 262 elif status == 200:
294 263 status = '200 Script output follows'
295 264 elif isinstance(status, int):
296 265 status = statusmessage(status)
297 266
298 267 # Various HTTP clients (notably httplib) won't read the HTTP
299 268 # response until the HTTP request has been sent in full. If servers
300 269 # (us) send a response before the HTTP request has been fully sent,
301 270 # the connection may deadlock because neither end is reading.
302 271 #
303 272 # We work around this by "draining" the request data before
304 273 # sending any response in some conditions.
305 274 drain = False
306 275 close = False
307 276
308 277 # If the client sent Expect: 100-continue, we assume it is smart
309 278 # enough to deal with the server sending a response before reading
310 279 # the request. (httplib doesn't do this.)
311 280 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
312 281 pass
313 282 # Only tend to request methods that have bodies. Strictly speaking,
314 283 # we should sniff for a body. But this is fine for our existing
315 284 # WSGI applications.
316 285 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
317 286 pass
318 287 else:
319 288 # If we don't know how much data to read, there's no guarantee
320 289 # that we can drain the request responsibly. The WSGI
321 290 # specification only says that servers *should* ensure the
322 291 # input stream doesn't overrun the actual request. So there's
323 292 # no guarantee that reading until EOF won't corrupt the stream
324 293 # state.
325 294 if not isinstance(self.req.bodyfh, util.cappedreader):
326 295 close = True
327 296 else:
328 297 # We /could/ only drain certain HTTP response codes. But 200
329 298 # and non-200 wire protocol responses both require draining.
330 299 # Since we have a capped reader in place for all situations
331 300 # where we drain, it is safe to read from that stream. We'll
332 301 # either do a drain or no-op if we're already at EOF.
333 302 drain = True
334 303
335 304 if close:
336 305 self.headers.append((r'Connection', r'Close'))
337 306
338 307 if drain:
339 308 assert isinstance(self.req.bodyfh, util.cappedreader)
340 309 while True:
341 310 chunk = self.req.bodyfh.read(32768)
342 311 if not chunk:
343 312 break
344 313
345 314 self.server_write = self._start_response(
346 315 pycompat.sysstr(status), self.headers)
347 316 self._start_response = None
348 317 self.headers = []
349 318 if body is not None:
350 319 self.write(body)
351 320 self.server_write = None
352 321
353 322 def write(self, thing):
354 323 if thing:
355 324 try:
356 325 self.server_write(thing)
357 326 except socket.error as inst:
358 327 if inst[0] != errno.ECONNRESET:
359 328 raise
360 329
361 330 def flush(self):
362 331 return None
363 332
364 333 def wsgiapplication(app_maker):
365 334 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
366 335 can and should now be used as a WSGI application.'''
367 336 application = app_maker()
368 337 def run_wsgi(env, respond):
369 338 return application(env, respond)
370 339 return run_wsgi
@@ -1,58 +1,58 b''
1 1 #require serve
2 2
3 3 Test raw style of hgweb
4 4
5 5 $ hg init test
6 6 $ cd test
7 7 $ mkdir sub
8 8 $ cat >'sub/some text%.txt' <<ENDSOME
9 9 > This is just some random text
10 10 > that will go inside the file and take a few lines.
11 11 > It is very boring to read, but computers don't
12 12 > care about things like that.
13 13 > ENDSOME
14 14 $ hg add 'sub/some text%.txt'
15 15 $ hg commit -d "1 0" -m "Just some text"
16 16
17 17 $ hg serve -p $HGPORT -A access.log -E error.log -d --pid-file=hg.pid
18 18
19 19 $ cat hg.pid >> $DAEMON_PIDS
20 $ (get-with-headers.py localhost:$HGPORT '?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw' content-type content-length content-disposition) >getoutput.txt
20 $ (get-with-headers.py localhost:$HGPORT 'raw-file/bf0ff59095c9/sub/some%20text%25.txt' content-type content-length content-disposition) >getoutput.txt
21 21
22 22 $ killdaemons.py hg.pid
23 23
24 24 $ cat getoutput.txt
25 25 200 Script output follows
26 26 content-type: application/binary
27 27 content-length: 157
28 28 content-disposition: inline; filename="some text%.txt"
29 29
30 30 This is just some random text
31 31 that will go inside the file and take a few lines.
32 32 It is very boring to read, but computers don't
33 33 care about things like that.
34 34 $ cat access.log error.log
35 $LOCALIP - - [*] "GET /?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw HTTP/1.1" 200 - (glob)
35 $LOCALIP - - [$LOGDATE$] "GET /raw-file/bf0ff59095c9/sub/some%20text%25.txt HTTP/1.1" 200 - (glob)
36 36
37 37 $ rm access.log error.log
38 38 $ hg serve -p $HGPORT -A access.log -E error.log -d --pid-file=hg.pid \
39 39 > --config web.guessmime=True
40 40
41 41 $ cat hg.pid >> $DAEMON_PIDS
42 $ (get-with-headers.py localhost:$HGPORT '?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw' content-type content-length content-disposition) >getoutput.txt
42 $ (get-with-headers.py localhost:$HGPORT 'raw-file/bf0ff59095c9/sub/some%20text%25.txt' content-type content-length content-disposition) >getoutput.txt
43 43 $ killdaemons.py hg.pid
44 44
45 45 $ cat getoutput.txt
46 46 200 Script output follows
47 47 content-type: text/plain; charset="ascii"
48 48 content-length: 157
49 49 content-disposition: inline; filename="some text%.txt"
50 50
51 51 This is just some random text
52 52 that will go inside the file and take a few lines.
53 53 It is very boring to read, but computers don't
54 54 care about things like that.
55 55 $ cat access.log error.log
56 $LOCALIP - - [*] "GET /?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw HTTP/1.1" 200 - (glob)
56 $LOCALIP - - [$LOGDATE$] "GET /raw-file/bf0ff59095c9/sub/some%20text%25.txt HTTP/1.1" 200 - (glob)
57 57
58 58 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now