##// END OF EJS Templates
hgweb: use a multidict for holding query string parameters...
Gregory Szorc -
r36878:ec0af9c5 default
parent child Browse files
Show More
@@ -1,463 +1,539 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 26 error,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 class multidict(object):
32 """A dict like object that can store multiple values for a key.
33
34 Used to store parsed request parameters.
35
36 This is inspired by WebOb's class of the same name.
37 """
38 def __init__(self):
39 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
40 # don't rely on parameters that much, so it shouldn't be a perf issue.
41 # we can always add dict for fast lookups.
42 self._items = []
43
44 def __getitem__(self, key):
45 """Returns the last set value for a key."""
46 for k, v in reversed(self._items):
47 if k == key:
48 return v
49
50 raise KeyError(key)
51
52 def __setitem__(self, key, value):
53 """Replace a values for a key with a new value."""
54 try:
55 del self[key]
56 except KeyError:
57 pass
58
59 self._items.append((key, value))
60
61 def __delitem__(self, key):
62 """Delete all values for a key."""
63 oldlen = len(self._items)
64
65 self._items[:] = [(k, v) for k, v in self._items if k != key]
66
67 if oldlen == len(self._items):
68 raise KeyError(key)
69
70 def __contains__(self, key):
71 return any(k == key for k, v in self._items)
72
73 def __len__(self):
74 return len(self._items)
75
76 def get(self, key, default=None):
77 try:
78 return self.__getitem__(key)
79 except KeyError:
80 return default
81
82 def add(self, key, value):
83 """Add a new value for a key. Does not replace existing values."""
84 self._items.append((key, value))
85
86 def getall(self, key):
87 """Obtains all values for a key."""
88 return [v for k, v in self._items if k == key]
89
90 def getone(self, key):
91 """Obtain a single value for a key.
92
93 Raises KeyError if key not defined or it has multiple values set.
94 """
95 vals = self.getall(key)
96
97 if not vals:
98 raise KeyError(key)
99
100 if len(vals) > 1:
101 raise KeyError('multiple values for %r' % key)
102
103 return vals[0]
104
105 def asdictoflists(self):
106 d = {}
107 for k, v in self._items:
108 if k in d:
109 d[k].append(v)
110 else:
111 d[k] = [v]
112
113 return d
114
31 115 @attr.s(frozen=True)
32 116 class parsedrequest(object):
33 117 """Represents a parsed WSGI request.
34 118
35 119 Contains both parsed parameters as well as a handle on the input stream.
36 120 """
37 121
38 122 # Request method.
39 123 method = attr.ib()
40 124 # Full URL for this request.
41 125 url = attr.ib()
42 126 # URL without any path components. Just <proto>://<host><port>.
43 127 baseurl = attr.ib()
44 128 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
45 129 # of HTTP: Host header for hostname. This is likely what clients used.
46 130 advertisedurl = attr.ib()
47 131 advertisedbaseurl = attr.ib()
48 132 # WSGI application path.
49 133 apppath = attr.ib()
50 134 # List of path parts to be used for dispatch.
51 135 dispatchparts = attr.ib()
52 136 # URL path component (no query string) used for dispatch.
53 137 dispatchpath = attr.ib()
54 138 # Whether there is a path component to this request. This can be true
55 139 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
56 140 havepathinfo = attr.ib()
57 141 # Raw query string (part after "?" in URL).
58 142 querystring = attr.ib()
59 # List of 2-tuples of query string arguments.
60 querystringlist = attr.ib()
61 # Dict of query string arguments. Values are lists with at least 1 item.
62 querystringdict = attr.ib()
143 # multidict of query string parameters.
144 qsparams = attr.ib()
63 145 # wsgiref.headers.Headers instance. Operates like a dict with case
64 146 # insensitive keys.
65 147 headers = attr.ib()
66 148 # Request body input stream.
67 149 bodyfh = attr.ib()
68 150
69 151 def parserequestfromenv(env, bodyfh):
70 152 """Parse URL components from environment variables.
71 153
72 154 WSGI defines request attributes via environment variables. This function
73 155 parses the environment variables into a data structure.
74 156 """
75 157 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
76 158
77 159 # We first validate that the incoming object conforms with the WSGI spec.
78 160 # We only want to be dealing with spec-conforming WSGI implementations.
79 161 # TODO enable this once we fix internal violations.
80 162 #wsgiref.validate.check_environ(env)
81 163
82 164 # PEP-0333 states that environment keys and values are native strings
83 165 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
84 166 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
85 167 # in Mercurial, so mass convert string keys and values to bytes.
86 168 if pycompat.ispy3:
87 169 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
88 170 env = {k: v.encode('latin-1') if isinstance(v, str) else v
89 171 for k, v in env.iteritems()}
90 172
91 173 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
92 174 # the environment variables.
93 175 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
94 176 # how URLs are reconstructed.
95 177 fullurl = env['wsgi.url_scheme'] + '://'
96 178 advertisedfullurl = fullurl
97 179
98 180 def addport(s):
99 181 if env['wsgi.url_scheme'] == 'https':
100 182 if env['SERVER_PORT'] != '443':
101 183 s += ':' + env['SERVER_PORT']
102 184 else:
103 185 if env['SERVER_PORT'] != '80':
104 186 s += ':' + env['SERVER_PORT']
105 187
106 188 return s
107 189
108 190 if env.get('HTTP_HOST'):
109 191 fullurl += env['HTTP_HOST']
110 192 else:
111 193 fullurl += env['SERVER_NAME']
112 194 fullurl = addport(fullurl)
113 195
114 196 advertisedfullurl += env['SERVER_NAME']
115 197 advertisedfullurl = addport(advertisedfullurl)
116 198
117 199 baseurl = fullurl
118 200 advertisedbaseurl = advertisedfullurl
119 201
120 202 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
121 203 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
122 204 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
123 205 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
124 206
125 207 if env.get('QUERY_STRING'):
126 208 fullurl += '?' + env['QUERY_STRING']
127 209 advertisedfullurl += '?' + env['QUERY_STRING']
128 210
129 211 # When dispatching requests, we look at the URL components (PATH_INFO
130 212 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
131 213 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
132 214 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
133 215 # root. We also exclude its path components from PATH_INFO when resolving
134 216 # the dispatch path.
135 217
136 218 apppath = env['SCRIPT_NAME']
137 219
138 220 if env.get('REPO_NAME'):
139 221 if not apppath.endswith('/'):
140 222 apppath += '/'
141 223
142 224 apppath += env.get('REPO_NAME')
143 225
144 226 if 'PATH_INFO' in env:
145 227 dispatchparts = env['PATH_INFO'].strip('/').split('/')
146 228
147 229 # Strip out repo parts.
148 230 repoparts = env.get('REPO_NAME', '').split('/')
149 231 if dispatchparts[:len(repoparts)] == repoparts:
150 232 dispatchparts = dispatchparts[len(repoparts):]
151 233 else:
152 234 dispatchparts = []
153 235
154 236 dispatchpath = '/'.join(dispatchparts)
155 237
156 238 querystring = env.get('QUERY_STRING', '')
157 239
158 240 # We store as a list so we have ordering information. We also store as
159 241 # a dict to facilitate fast lookup.
160 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
161
162 querystringdict = {}
163 for k, v in querystringlist:
164 if k in querystringdict:
165 querystringdict[k].append(v)
166 else:
167 querystringdict[k] = [v]
242 qsparams = multidict()
243 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
244 qsparams.add(k, v)
168 245
169 246 # HTTP_* keys contain HTTP request headers. The Headers structure should
170 247 # perform case normalization for us. We just rewrite underscore to dash
171 248 # so keys match what likely went over the wire.
172 249 headers = []
173 250 for k, v in env.iteritems():
174 251 if k.startswith('HTTP_'):
175 252 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
176 253
177 254 headers = wsgiheaders.Headers(headers)
178 255
179 256 # This is kind of a lie because the HTTP header wasn't explicitly
180 257 # sent. But for all intents and purposes it should be OK to lie about
181 258 # this, since a consumer will either either value to determine how many
182 259 # bytes are available to read.
183 260 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
184 261 headers['Content-Length'] = env['CONTENT_LENGTH']
185 262
186 263 # TODO do this once we remove wsgirequest.inp, otherwise we could have
187 264 # multiple readers from the underlying input stream.
188 265 #bodyfh = env['wsgi.input']
189 266 #if 'Content-Length' in headers:
190 267 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
191 268
192 269 return parsedrequest(method=env['REQUEST_METHOD'],
193 270 url=fullurl, baseurl=baseurl,
194 271 advertisedurl=advertisedfullurl,
195 272 advertisedbaseurl=advertisedbaseurl,
196 273 apppath=apppath,
197 274 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
198 275 havepathinfo='PATH_INFO' in env,
199 276 querystring=querystring,
200 querystringlist=querystringlist,
201 querystringdict=querystringdict,
277 qsparams=qsparams,
202 278 headers=headers,
203 279 bodyfh=bodyfh)
204 280
205 281 class wsgiresponse(object):
206 282 """Represents a response to a WSGI request.
207 283
208 284 A response consists of a status line, headers, and a body.
209 285
210 286 Consumers must populate the ``status`` and ``headers`` fields and
211 287 make a call to a ``setbody*()`` method before the response can be
212 288 issued.
213 289
214 290 When it is time to start sending the response over the wire,
215 291 ``sendresponse()`` is called. It handles emitting the header portion
216 292 of the response message. It then yields chunks of body data to be
217 293 written to the peer. Typically, the WSGI application itself calls
218 294 and returns the value from ``sendresponse()``.
219 295 """
220 296
221 297 def __init__(self, req, startresponse):
222 298 """Create an empty response tied to a specific request.
223 299
224 300 ``req`` is a ``parsedrequest``. ``startresponse`` is the
225 301 ``start_response`` function passed to the WSGI application.
226 302 """
227 303 self._req = req
228 304 self._startresponse = startresponse
229 305
230 306 self.status = None
231 307 self.headers = wsgiheaders.Headers([])
232 308
233 309 self._bodybytes = None
234 310 self._bodygen = None
235 311 self._started = False
236 312
237 313 def setbodybytes(self, b):
238 314 """Define the response body as static bytes."""
239 315 if self._bodybytes is not None or self._bodygen is not None:
240 316 raise error.ProgrammingError('cannot define body multiple times')
241 317
242 318 self._bodybytes = b
243 319 self.headers['Content-Length'] = '%d' % len(b)
244 320
245 321 def setbodygen(self, gen):
246 322 """Define the response body as a generator of bytes."""
247 323 if self._bodybytes is not None or self._bodygen is not None:
248 324 raise error.ProgrammingError('cannot define body multiple times')
249 325
250 326 self._bodygen = gen
251 327
252 328 def sendresponse(self):
253 329 """Send the generated response to the client.
254 330
255 331 Before this is called, ``status`` must be set and one of
256 332 ``setbodybytes()`` or ``setbodygen()`` must be called.
257 333
258 334 Calling this method multiple times is not allowed.
259 335 """
260 336 if self._started:
261 337 raise error.ProgrammingError('sendresponse() called multiple times')
262 338
263 339 self._started = True
264 340
265 341 if not self.status:
266 342 raise error.ProgrammingError('status line not defined')
267 343
268 344 if self._bodybytes is None and self._bodygen is None:
269 345 raise error.ProgrammingError('response body not defined')
270 346
271 347 # Various HTTP clients (notably httplib) won't read the HTTP response
272 348 # until the HTTP request has been sent in full. If servers (us) send a
273 349 # response before the HTTP request has been fully sent, the connection
274 350 # may deadlock because neither end is reading.
275 351 #
276 352 # We work around this by "draining" the request data before
277 353 # sending any response in some conditions.
278 354 drain = False
279 355 close = False
280 356
281 357 # If the client sent Expect: 100-continue, we assume it is smart enough
282 358 # to deal with the server sending a response before reading the request.
283 359 # (httplib doesn't do this.)
284 360 if self._req.headers.get('Expect', '').lower() == '100-continue':
285 361 pass
286 362 # Only tend to request methods that have bodies. Strictly speaking,
287 363 # we should sniff for a body. But this is fine for our existing
288 364 # WSGI applications.
289 365 elif self._req.method not in ('POST', 'PUT'):
290 366 pass
291 367 else:
292 368 # If we don't know how much data to read, there's no guarantee
293 369 # that we can drain the request responsibly. The WSGI
294 370 # specification only says that servers *should* ensure the
295 371 # input stream doesn't overrun the actual request. So there's
296 372 # no guarantee that reading until EOF won't corrupt the stream
297 373 # state.
298 374 if not isinstance(self._req.bodyfh, util.cappedreader):
299 375 close = True
300 376 else:
301 377 # We /could/ only drain certain HTTP response codes. But 200 and
302 378 # non-200 wire protocol responses both require draining. Since
303 379 # we have a capped reader in place for all situations where we
304 380 # drain, it is safe to read from that stream. We'll either do
305 381 # a drain or no-op if we're already at EOF.
306 382 drain = True
307 383
308 384 if close:
309 385 self.headers['Connection'] = 'Close'
310 386
311 387 if drain:
312 388 assert isinstance(self._req.bodyfh, util.cappedreader)
313 389 while True:
314 390 chunk = self._req.bodyfh.read(32768)
315 391 if not chunk:
316 392 break
317 393
318 394 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
319 395 if self._bodybytes:
320 396 yield self._bodybytes
321 397 elif self._bodygen:
322 398 for chunk in self._bodygen:
323 399 yield chunk
324 400 else:
325 401 error.ProgrammingError('do not know how to send body')
326 402
327 403 class wsgirequest(object):
328 404 """Higher-level API for a WSGI request.
329 405
330 406 WSGI applications are invoked with 2 arguments. They are used to
331 407 instantiate instances of this class, which provides higher-level APIs
332 408 for obtaining request parameters, writing HTTP output, etc.
333 409 """
334 410 def __init__(self, wsgienv, start_response):
335 411 version = wsgienv[r'wsgi.version']
336 412 if (version < (1, 0)) or (version >= (2, 0)):
337 413 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
338 414 % version)
339 415
340 416 inp = wsgienv[r'wsgi.input']
341 417
342 418 if r'HTTP_CONTENT_LENGTH' in wsgienv:
343 419 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
344 420 elif r'CONTENT_LENGTH' in wsgienv:
345 421 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
346 422
347 423 self.err = wsgienv[r'wsgi.errors']
348 424 self.threaded = wsgienv[r'wsgi.multithread']
349 425 self.multiprocess = wsgienv[r'wsgi.multiprocess']
350 426 self.run_once = wsgienv[r'wsgi.run_once']
351 427 self.env = wsgienv
352 428 self.req = parserequestfromenv(wsgienv, inp)
353 self.form = self.req.querystringdict
429 self.form = self.req.qsparams.asdictoflists()
354 430 self.res = wsgiresponse(self.req, start_response)
355 431 self._start_response = start_response
356 432 self.server_write = None
357 433 self.headers = []
358 434
359 435 def respond(self, status, type, filename=None, body=None):
360 436 if not isinstance(type, str):
361 437 type = pycompat.sysstr(type)
362 438 if self._start_response is not None:
363 439 self.headers.append((r'Content-Type', type))
364 440 if filename:
365 441 filename = (filename.rpartition('/')[-1]
366 442 .replace('\\', '\\\\').replace('"', '\\"'))
367 443 self.headers.append(('Content-Disposition',
368 444 'inline; filename="%s"' % filename))
369 445 if body is not None:
370 446 self.headers.append((r'Content-Length', str(len(body))))
371 447
372 448 for k, v in self.headers:
373 449 if not isinstance(v, str):
374 450 raise TypeError('header value must be string: %r' % (v,))
375 451
376 452 if isinstance(status, ErrorResponse):
377 453 self.headers.extend(status.headers)
378 454 if status.code == HTTP_NOT_MODIFIED:
379 455 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
380 456 # it MUST NOT include any headers other than these and no
381 457 # body
382 458 self.headers = [(k, v) for (k, v) in self.headers if
383 459 k in ('Date', 'ETag', 'Expires',
384 460 'Cache-Control', 'Vary')]
385 461 status = statusmessage(status.code, pycompat.bytestr(status))
386 462 elif status == 200:
387 463 status = '200 Script output follows'
388 464 elif isinstance(status, int):
389 465 status = statusmessage(status)
390 466
391 467 # Various HTTP clients (notably httplib) won't read the HTTP
392 468 # response until the HTTP request has been sent in full. If servers
393 469 # (us) send a response before the HTTP request has been fully sent,
394 470 # the connection may deadlock because neither end is reading.
395 471 #
396 472 # We work around this by "draining" the request data before
397 473 # sending any response in some conditions.
398 474 drain = False
399 475 close = False
400 476
401 477 # If the client sent Expect: 100-continue, we assume it is smart
402 478 # enough to deal with the server sending a response before reading
403 479 # the request. (httplib doesn't do this.)
404 480 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
405 481 pass
406 482 # Only tend to request methods that have bodies. Strictly speaking,
407 483 # we should sniff for a body. But this is fine for our existing
408 484 # WSGI applications.
409 485 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
410 486 pass
411 487 else:
412 488 # If we don't know how much data to read, there's no guarantee
413 489 # that we can drain the request responsibly. The WSGI
414 490 # specification only says that servers *should* ensure the
415 491 # input stream doesn't overrun the actual request. So there's
416 492 # no guarantee that reading until EOF won't corrupt the stream
417 493 # state.
418 494 if not isinstance(self.req.bodyfh, util.cappedreader):
419 495 close = True
420 496 else:
421 497 # We /could/ only drain certain HTTP response codes. But 200
422 498 # and non-200 wire protocol responses both require draining.
423 499 # Since we have a capped reader in place for all situations
424 500 # where we drain, it is safe to read from that stream. We'll
425 501 # either do a drain or no-op if we're already at EOF.
426 502 drain = True
427 503
428 504 if close:
429 505 self.headers.append((r'Connection', r'Close'))
430 506
431 507 if drain:
432 508 assert isinstance(self.req.bodyfh, util.cappedreader)
433 509 while True:
434 510 chunk = self.req.bodyfh.read(32768)
435 511 if not chunk:
436 512 break
437 513
438 514 self.server_write = self._start_response(
439 515 pycompat.sysstr(status), self.headers)
440 516 self._start_response = None
441 517 self.headers = []
442 518 if body is not None:
443 519 self.write(body)
444 520 self.server_write = None
445 521
446 522 def write(self, thing):
447 523 if thing:
448 524 try:
449 525 self.server_write(thing)
450 526 except socket.error as inst:
451 527 if inst[0] != errno.ECONNRESET:
452 528 raise
453 529
454 530 def flush(self):
455 531 return None
456 532
457 533 def wsgiapplication(app_maker):
458 534 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
459 535 can and should now be used as a WSGI application.'''
460 536 application = app_maker()
461 537 def run_wsgi(env, respond):
462 538 return application(env, respond)
463 539 return run_wsgi
@@ -1,655 +1,655 b''
1 1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 2 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import struct
11 11 import sys
12 12 import threading
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 wireproto,
22 22 wireprototypes,
23 23 )
24 24
25 25 stringio = util.stringio
26 26
27 27 urlerr = util.urlerr
28 28 urlreq = util.urlreq
29 29
30 30 HTTP_OK = 200
31 31
32 32 HGTYPE = 'application/mercurial-0.1'
33 33 HGTYPE2 = 'application/mercurial-0.2'
34 34 HGERRTYPE = 'application/hg-error'
35 35
36 36 SSHV1 = wireprototypes.SSHV1
37 37 SSHV2 = wireprototypes.SSHV2
38 38
39 39 def decodevaluefromheaders(req, headerprefix):
40 40 """Decode a long value from multiple HTTP request headers.
41 41
42 42 Returns the value as a bytes, not a str.
43 43 """
44 44 chunks = []
45 45 i = 1
46 46 while True:
47 47 v = req.headers.get(b'%s-%d' % (headerprefix, i))
48 48 if v is None:
49 49 break
50 50 chunks.append(pycompat.bytesurl(v))
51 51 i += 1
52 52
53 53 return ''.join(chunks)
54 54
55 55 class httpv1protocolhandler(wireprototypes.baseprotocolhandler):
56 56 def __init__(self, wsgireq, req, ui, checkperm):
57 57 self._wsgireq = wsgireq
58 58 self._req = req
59 59 self._ui = ui
60 60 self._checkperm = checkperm
61 61
62 62 @property
63 63 def name(self):
64 64 return 'http-v1'
65 65
66 66 def getargs(self, args):
67 67 knownargs = self._args()
68 68 data = {}
69 69 keys = args.split()
70 70 for k in keys:
71 71 if k == '*':
72 72 star = {}
73 73 for key in knownargs.keys():
74 74 if key != 'cmd' and key not in keys:
75 75 star[key] = knownargs[key][0]
76 76 data['*'] = star
77 77 else:
78 78 data[k] = knownargs[k][0]
79 79 return [data[k] for k in keys]
80 80
81 81 def _args(self):
82 args = util.rapply(pycompat.bytesurl, self._wsgireq.form.copy())
82 args = self._req.qsparams.asdictoflists()
83 83 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
84 84 if postlen:
85 85 args.update(urlreq.parseqs(
86 86 self._req.bodyfh.read(postlen), keep_blank_values=True))
87 87 return args
88 88
89 89 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
90 90 args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
91 91 return args
92 92
93 93 def forwardpayload(self, fp):
94 94 # Existing clients *always* send Content-Length.
95 95 length = int(self._req.headers[b'Content-Length'])
96 96
97 97 # If httppostargs is used, we need to read Content-Length
98 98 # minus the amount that was consumed by args.
99 99 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
100 100 for s in util.filechunkiter(self._req.bodyfh, limit=length):
101 101 fp.write(s)
102 102
103 103 @contextlib.contextmanager
104 104 def mayberedirectstdio(self):
105 105 oldout = self._ui.fout
106 106 olderr = self._ui.ferr
107 107
108 108 out = util.stringio()
109 109
110 110 try:
111 111 self._ui.fout = out
112 112 self._ui.ferr = out
113 113 yield out
114 114 finally:
115 115 self._ui.fout = oldout
116 116 self._ui.ferr = olderr
117 117
118 118 def client(self):
119 119 return 'remote:%s:%s:%s' % (
120 120 self._wsgireq.env.get('wsgi.url_scheme') or 'http',
121 121 urlreq.quote(self._wsgireq.env.get('REMOTE_HOST', '')),
122 122 urlreq.quote(self._wsgireq.env.get('REMOTE_USER', '')))
123 123
124 124 def addcapabilities(self, repo, caps):
125 125 caps.append('httpheader=%d' %
126 126 repo.ui.configint('server', 'maxhttpheaderlen'))
127 127 if repo.ui.configbool('experimental', 'httppostargs'):
128 128 caps.append('httppostargs')
129 129
130 130 # FUTURE advertise 0.2rx once support is implemented
131 131 # FUTURE advertise minrx and mintx after consulting config option
132 132 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
133 133
134 134 compengines = wireproto.supportedcompengines(repo.ui, util.SERVERROLE)
135 135 if compengines:
136 136 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
137 137 for e in compengines)
138 138 caps.append('compression=%s' % comptypes)
139 139
140 140 return caps
141 141
142 142 def checkperm(self, perm):
143 143 return self._checkperm(perm)
144 144
145 145 # This method exists mostly so that extensions like remotefilelog can
146 146 # disable a kludgey legacy method only over http. As of early 2018,
147 147 # there are no other known users, so with any luck we can discard this
148 148 # hook if remotefilelog becomes a first-party extension.
149 149 def iscmd(cmd):
150 150 return cmd in wireproto.commands
151 151
152 152 def handlewsgirequest(rctx, wsgireq, req, res, checkperm):
153 153 """Possibly process a wire protocol request.
154 154
155 155 If the current request is a wire protocol request, the request is
156 156 processed by this function.
157 157
158 158 ``wsgireq`` is a ``wsgirequest`` instance.
159 159 ``req`` is a ``parsedrequest`` instance.
160 160 ``res`` is a ``wsgiresponse`` instance.
161 161
162 162 Returns a bool indicating if the request was serviced. If set, the caller
163 163 should stop processing the request, as a response has already been issued.
164 164 """
165 165 # Avoid cycle involving hg module.
166 166 from .hgweb import common as hgwebcommon
167 167
168 168 repo = rctx.repo
169 169
170 170 # HTTP version 1 wire protocol requests are denoted by a "cmd" query
171 171 # string parameter. If it isn't present, this isn't a wire protocol
172 172 # request.
173 if 'cmd' not in req.querystringdict:
173 if 'cmd' not in req.qsparams:
174 174 return False
175 175
176 cmd = req.querystringdict['cmd'][0]
176 cmd = req.qsparams['cmd']
177 177
178 178 # The "cmd" request parameter is used by both the wire protocol and hgweb.
179 179 # While not all wire protocol commands are available for all transports,
180 180 # if we see a "cmd" value that resembles a known wire protocol command, we
181 181 # route it to a protocol handler. This is better than routing possible
182 182 # wire protocol requests to hgweb because it prevents hgweb from using
183 183 # known wire protocol commands and it is less confusing for machine
184 184 # clients.
185 185 if not iscmd(cmd):
186 186 return False
187 187
188 188 # The "cmd" query string argument is only valid on the root path of the
189 189 # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo
190 190 # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request
191 191 # in this case. We send an HTTP 404 for backwards compatibility reasons.
192 192 if req.dispatchpath:
193 193 res.status = hgwebcommon.statusmessage(404)
194 194 res.headers['Content-Type'] = HGTYPE
195 195 # TODO This is not a good response to issue for this request. This
196 196 # is mostly for BC for now.
197 197 res.setbodybytes('0\n%s\n' % b'Not Found')
198 198 return True
199 199
200 200 proto = httpv1protocolhandler(wsgireq, req, repo.ui,
201 201 lambda perm: checkperm(rctx, wsgireq, perm))
202 202
203 203 # The permissions checker should be the only thing that can raise an
204 204 # ErrorResponse. It is kind of a layer violation to catch an hgweb
205 205 # exception here. So consider refactoring into a exception type that
206 206 # is associated with the wire protocol.
207 207 try:
208 208 _callhttp(repo, wsgireq, req, res, proto, cmd)
209 209 except hgwebcommon.ErrorResponse as e:
210 210 for k, v in e.headers:
211 211 res.headers[k] = v
212 212 res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
213 213 # TODO This response body assumes the failed command was
214 214 # "unbundle." That assumption is not always valid.
215 215 res.setbodybytes('0\n%s\n' % pycompat.bytestr(e))
216 216
217 217 return True
218 218
219 219 def _httpresponsetype(ui, req, prefer_uncompressed):
220 220 """Determine the appropriate response type and compression settings.
221 221
222 222 Returns a tuple of (mediatype, compengine, engineopts).
223 223 """
224 224 # Determine the response media type and compression engine based
225 225 # on the request parameters.
226 226 protocaps = decodevaluefromheaders(req, 'X-HgProto').split(' ')
227 227
228 228 if '0.2' in protocaps:
229 229 # All clients are expected to support uncompressed data.
230 230 if prefer_uncompressed:
231 231 return HGTYPE2, util._noopengine(), {}
232 232
233 233 # Default as defined by wire protocol spec.
234 234 compformats = ['zlib', 'none']
235 235 for cap in protocaps:
236 236 if cap.startswith('comp='):
237 237 compformats = cap[5:].split(',')
238 238 break
239 239
240 240 # Now find an agreed upon compression format.
241 241 for engine in wireproto.supportedcompengines(ui, util.SERVERROLE):
242 242 if engine.wireprotosupport().name in compformats:
243 243 opts = {}
244 244 level = ui.configint('server', '%slevel' % engine.name())
245 245 if level is not None:
246 246 opts['level'] = level
247 247
248 248 return HGTYPE2, engine, opts
249 249
250 250 # No mutually supported compression format. Fall back to the
251 251 # legacy protocol.
252 252
253 253 # Don't allow untrusted settings because disabling compression or
254 254 # setting a very high compression level could lead to flooding
255 255 # the server's network or CPU.
256 256 opts = {'level': ui.configint('server', 'zliblevel')}
257 257 return HGTYPE, util.compengines['zlib'], opts
258 258
259 259 def _callhttp(repo, wsgireq, req, res, proto, cmd):
260 260 # Avoid cycle involving hg module.
261 261 from .hgweb import common as hgwebcommon
262 262
263 263 def genversion2(gen, engine, engineopts):
264 264 # application/mercurial-0.2 always sends a payload header
265 265 # identifying the compression engine.
266 266 name = engine.wireprotosupport().name
267 267 assert 0 < len(name) < 256
268 268 yield struct.pack('B', len(name))
269 269 yield name
270 270
271 271 for chunk in gen:
272 272 yield chunk
273 273
274 274 def setresponse(code, contenttype, bodybytes=None, bodygen=None):
275 275 if code == HTTP_OK:
276 276 res.status = '200 Script output follows'
277 277 else:
278 278 res.status = hgwebcommon.statusmessage(code)
279 279
280 280 res.headers['Content-Type'] = contenttype
281 281
282 282 if bodybytes is not None:
283 283 res.setbodybytes(bodybytes)
284 284 if bodygen is not None:
285 285 res.setbodygen(bodygen)
286 286
287 287 if not wireproto.commands.commandavailable(cmd, proto):
288 288 setresponse(HTTP_OK, HGERRTYPE,
289 289 _('requested wire protocol command is not available over '
290 290 'HTTP'))
291 291 return
292 292
293 293 proto.checkperm(wireproto.commands[cmd].permission)
294 294
295 295 rsp = wireproto.dispatch(repo, proto, cmd)
296 296
297 297 if isinstance(rsp, bytes):
298 298 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
299 299 elif isinstance(rsp, wireprototypes.bytesresponse):
300 300 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp.data)
301 301 elif isinstance(rsp, wireprototypes.streamreslegacy):
302 302 setresponse(HTTP_OK, HGTYPE, bodygen=rsp.gen)
303 303 elif isinstance(rsp, wireprototypes.streamres):
304 304 gen = rsp.gen
305 305
306 306 # This code for compression should not be streamres specific. It
307 307 # is here because we only compress streamres at the moment.
308 308 mediatype, engine, engineopts = _httpresponsetype(
309 309 repo.ui, req, rsp.prefer_uncompressed)
310 310 gen = engine.compressstream(gen, engineopts)
311 311
312 312 if mediatype == HGTYPE2:
313 313 gen = genversion2(gen, engine, engineopts)
314 314
315 315 setresponse(HTTP_OK, mediatype, bodygen=gen)
316 316 elif isinstance(rsp, wireprototypes.pushres):
317 317 rsp = '%d\n%s' % (rsp.res, rsp.output)
318 318 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
319 319 elif isinstance(rsp, wireprototypes.pusherr):
320 320 rsp = '0\n%s\n' % rsp.res
321 321 res.drain = True
322 322 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
323 323 elif isinstance(rsp, wireprototypes.ooberror):
324 324 setresponse(HTTP_OK, HGERRTYPE, bodybytes=rsp.message)
325 325 else:
326 326 raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
327 327
328 328 def _sshv1respondbytes(fout, value):
329 329 """Send a bytes response for protocol version 1."""
330 330 fout.write('%d\n' % len(value))
331 331 fout.write(value)
332 332 fout.flush()
333 333
334 334 def _sshv1respondstream(fout, source):
335 335 write = fout.write
336 336 for chunk in source.gen:
337 337 write(chunk)
338 338 fout.flush()
339 339
340 340 def _sshv1respondooberror(fout, ferr, rsp):
341 341 ferr.write(b'%s\n-\n' % rsp)
342 342 ferr.flush()
343 343 fout.write(b'\n')
344 344 fout.flush()
345 345
346 346 class sshv1protocolhandler(wireprototypes.baseprotocolhandler):
347 347 """Handler for requests services via version 1 of SSH protocol."""
348 348 def __init__(self, ui, fin, fout):
349 349 self._ui = ui
350 350 self._fin = fin
351 351 self._fout = fout
352 352
353 353 @property
354 354 def name(self):
355 355 return wireprototypes.SSHV1
356 356
357 357 def getargs(self, args):
358 358 data = {}
359 359 keys = args.split()
360 360 for n in xrange(len(keys)):
361 361 argline = self._fin.readline()[:-1]
362 362 arg, l = argline.split()
363 363 if arg not in keys:
364 364 raise error.Abort(_("unexpected parameter %r") % arg)
365 365 if arg == '*':
366 366 star = {}
367 367 for k in xrange(int(l)):
368 368 argline = self._fin.readline()[:-1]
369 369 arg, l = argline.split()
370 370 val = self._fin.read(int(l))
371 371 star[arg] = val
372 372 data['*'] = star
373 373 else:
374 374 val = self._fin.read(int(l))
375 375 data[arg] = val
376 376 return [data[k] for k in keys]
377 377
378 378 def forwardpayload(self, fpout):
379 379 # We initially send an empty response. This tells the client it is
380 380 # OK to start sending data. If a client sees any other response, it
381 381 # interprets it as an error.
382 382 _sshv1respondbytes(self._fout, b'')
383 383
384 384 # The file is in the form:
385 385 #
386 386 # <chunk size>\n<chunk>
387 387 # ...
388 388 # 0\n
389 389 count = int(self._fin.readline())
390 390 while count:
391 391 fpout.write(self._fin.read(count))
392 392 count = int(self._fin.readline())
393 393
394 394 @contextlib.contextmanager
395 395 def mayberedirectstdio(self):
396 396 yield None
397 397
398 398 def client(self):
399 399 client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
400 400 return 'remote:ssh:' + client
401 401
402 402 def addcapabilities(self, repo, caps):
403 403 return caps
404 404
405 405 def checkperm(self, perm):
406 406 pass
407 407
408 408 class sshv2protocolhandler(sshv1protocolhandler):
409 409 """Protocol handler for version 2 of the SSH protocol."""
410 410
411 411 @property
412 412 def name(self):
413 413 return wireprototypes.SSHV2
414 414
415 415 def _runsshserver(ui, repo, fin, fout, ev):
416 416 # This function operates like a state machine of sorts. The following
417 417 # states are defined:
418 418 #
419 419 # protov1-serving
420 420 # Server is in protocol version 1 serving mode. Commands arrive on
421 421 # new lines. These commands are processed in this state, one command
422 422 # after the other.
423 423 #
424 424 # protov2-serving
425 425 # Server is in protocol version 2 serving mode.
426 426 #
427 427 # upgrade-initial
428 428 # The server is going to process an upgrade request.
429 429 #
430 430 # upgrade-v2-filter-legacy-handshake
431 431 # The protocol is being upgraded to version 2. The server is expecting
432 432 # the legacy handshake from version 1.
433 433 #
434 434 # upgrade-v2-finish
435 435 # The upgrade to version 2 of the protocol is imminent.
436 436 #
437 437 # shutdown
438 438 # The server is shutting down, possibly in reaction to a client event.
439 439 #
440 440 # And here are their transitions:
441 441 #
442 442 # protov1-serving -> shutdown
443 443 # When server receives an empty request or encounters another
444 444 # error.
445 445 #
446 446 # protov1-serving -> upgrade-initial
447 447 # An upgrade request line was seen.
448 448 #
449 449 # upgrade-initial -> upgrade-v2-filter-legacy-handshake
450 450 # Upgrade to version 2 in progress. Server is expecting to
451 451 # process a legacy handshake.
452 452 #
453 453 # upgrade-v2-filter-legacy-handshake -> shutdown
454 454 # Client did not fulfill upgrade handshake requirements.
455 455 #
456 456 # upgrade-v2-filter-legacy-handshake -> upgrade-v2-finish
457 457 # Client fulfilled version 2 upgrade requirements. Finishing that
458 458 # upgrade.
459 459 #
460 460 # upgrade-v2-finish -> protov2-serving
461 461 # Protocol upgrade to version 2 complete. Server can now speak protocol
462 462 # version 2.
463 463 #
464 464 # protov2-serving -> protov1-serving
465 465 # Ths happens by default since protocol version 2 is the same as
466 466 # version 1 except for the handshake.
467 467
468 468 state = 'protov1-serving'
469 469 proto = sshv1protocolhandler(ui, fin, fout)
470 470 protoswitched = False
471 471
472 472 while not ev.is_set():
473 473 if state == 'protov1-serving':
474 474 # Commands are issued on new lines.
475 475 request = fin.readline()[:-1]
476 476
477 477 # Empty lines signal to terminate the connection.
478 478 if not request:
479 479 state = 'shutdown'
480 480 continue
481 481
482 482 # It looks like a protocol upgrade request. Transition state to
483 483 # handle it.
484 484 if request.startswith(b'upgrade '):
485 485 if protoswitched:
486 486 _sshv1respondooberror(fout, ui.ferr,
487 487 b'cannot upgrade protocols multiple '
488 488 b'times')
489 489 state = 'shutdown'
490 490 continue
491 491
492 492 state = 'upgrade-initial'
493 493 continue
494 494
495 495 available = wireproto.commands.commandavailable(request, proto)
496 496
497 497 # This command isn't available. Send an empty response and go
498 498 # back to waiting for a new command.
499 499 if not available:
500 500 _sshv1respondbytes(fout, b'')
501 501 continue
502 502
503 503 rsp = wireproto.dispatch(repo, proto, request)
504 504
505 505 if isinstance(rsp, bytes):
506 506 _sshv1respondbytes(fout, rsp)
507 507 elif isinstance(rsp, wireprototypes.bytesresponse):
508 508 _sshv1respondbytes(fout, rsp.data)
509 509 elif isinstance(rsp, wireprototypes.streamres):
510 510 _sshv1respondstream(fout, rsp)
511 511 elif isinstance(rsp, wireprototypes.streamreslegacy):
512 512 _sshv1respondstream(fout, rsp)
513 513 elif isinstance(rsp, wireprototypes.pushres):
514 514 _sshv1respondbytes(fout, b'')
515 515 _sshv1respondbytes(fout, b'%d' % rsp.res)
516 516 elif isinstance(rsp, wireprototypes.pusherr):
517 517 _sshv1respondbytes(fout, rsp.res)
518 518 elif isinstance(rsp, wireprototypes.ooberror):
519 519 _sshv1respondooberror(fout, ui.ferr, rsp.message)
520 520 else:
521 521 raise error.ProgrammingError('unhandled response type from '
522 522 'wire protocol command: %s' % rsp)
523 523
524 524 # For now, protocol version 2 serving just goes back to version 1.
525 525 elif state == 'protov2-serving':
526 526 state = 'protov1-serving'
527 527 continue
528 528
529 529 elif state == 'upgrade-initial':
530 530 # We should never transition into this state if we've switched
531 531 # protocols.
532 532 assert not protoswitched
533 533 assert proto.name == wireprototypes.SSHV1
534 534
535 535 # Expected: upgrade <token> <capabilities>
536 536 # If we get something else, the request is malformed. It could be
537 537 # from a future client that has altered the upgrade line content.
538 538 # We treat this as an unknown command.
539 539 try:
540 540 token, caps = request.split(b' ')[1:]
541 541 except ValueError:
542 542 _sshv1respondbytes(fout, b'')
543 543 state = 'protov1-serving'
544 544 continue
545 545
546 546 # Send empty response if we don't support upgrading protocols.
547 547 if not ui.configbool('experimental', 'sshserver.support-v2'):
548 548 _sshv1respondbytes(fout, b'')
549 549 state = 'protov1-serving'
550 550 continue
551 551
552 552 try:
553 553 caps = urlreq.parseqs(caps)
554 554 except ValueError:
555 555 _sshv1respondbytes(fout, b'')
556 556 state = 'protov1-serving'
557 557 continue
558 558
559 559 # We don't see an upgrade request to protocol version 2. Ignore
560 560 # the upgrade request.
561 561 wantedprotos = caps.get(b'proto', [b''])[0]
562 562 if SSHV2 not in wantedprotos:
563 563 _sshv1respondbytes(fout, b'')
564 564 state = 'protov1-serving'
565 565 continue
566 566
567 567 # It looks like we can honor this upgrade request to protocol 2.
568 568 # Filter the rest of the handshake protocol request lines.
569 569 state = 'upgrade-v2-filter-legacy-handshake'
570 570 continue
571 571
572 572 elif state == 'upgrade-v2-filter-legacy-handshake':
573 573 # Client should have sent legacy handshake after an ``upgrade``
574 574 # request. Expected lines:
575 575 #
576 576 # hello
577 577 # between
578 578 # pairs 81
579 579 # 0000...-0000...
580 580
581 581 ok = True
582 582 for line in (b'hello', b'between', b'pairs 81'):
583 583 request = fin.readline()[:-1]
584 584
585 585 if request != line:
586 586 _sshv1respondooberror(fout, ui.ferr,
587 587 b'malformed handshake protocol: '
588 588 b'missing %s' % line)
589 589 ok = False
590 590 state = 'shutdown'
591 591 break
592 592
593 593 if not ok:
594 594 continue
595 595
596 596 request = fin.read(81)
597 597 if request != b'%s-%s' % (b'0' * 40, b'0' * 40):
598 598 _sshv1respondooberror(fout, ui.ferr,
599 599 b'malformed handshake protocol: '
600 600 b'missing between argument value')
601 601 state = 'shutdown'
602 602 continue
603 603
604 604 state = 'upgrade-v2-finish'
605 605 continue
606 606
607 607 elif state == 'upgrade-v2-finish':
608 608 # Send the upgrade response.
609 609 fout.write(b'upgraded %s %s\n' % (token, SSHV2))
610 610 servercaps = wireproto.capabilities(repo, proto)
611 611 rsp = b'capabilities: %s' % servercaps.data
612 612 fout.write(b'%d\n%s\n' % (len(rsp), rsp))
613 613 fout.flush()
614 614
615 615 proto = sshv2protocolhandler(ui, fin, fout)
616 616 protoswitched = True
617 617
618 618 state = 'protov2-serving'
619 619 continue
620 620
621 621 elif state == 'shutdown':
622 622 break
623 623
624 624 else:
625 625 raise error.ProgrammingError('unhandled ssh server state: %s' %
626 626 state)
627 627
628 628 class sshserver(object):
629 629 def __init__(self, ui, repo, logfh=None):
630 630 self._ui = ui
631 631 self._repo = repo
632 632 self._fin = ui.fin
633 633 self._fout = ui.fout
634 634
635 635 # Log write I/O to stdout and stderr if configured.
636 636 if logfh:
637 637 self._fout = util.makeloggingfileobject(
638 638 logfh, self._fout, 'o', logdata=True)
639 639 ui.ferr = util.makeloggingfileobject(
640 640 logfh, ui.ferr, 'e', logdata=True)
641 641
642 642 hook.redirect(True)
643 643 ui.fout = repo.ui.fout = ui.ferr
644 644
645 645 # Prevent insertion/deletion of CRs
646 646 util.setbinary(self._fin)
647 647 util.setbinary(self._fout)
648 648
649 649 def serve_forever(self):
650 650 self.serveuntil(threading.Event())
651 651 sys.exit(0)
652 652
653 653 def serveuntil(self, ev):
654 654 """Serve until a threading.Event is set."""
655 655 _runsshserver(self._ui, self._repo, self._fin, self._fout, ev)
General Comments 0
You need to be logged in to leave comments. Login now