##// END OF EJS Templates
hgweb: expose URL scheme and REMOTE_* attributes...
Gregory Szorc -
r36883:a755fd3b default
parent child Browse files
Show More
@@ -1,538 +1,547
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 26 error,
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 class multidict(object):
32 32 """A dict like object that can store multiple values for a key.
33 33
34 34 Used to store parsed request parameters.
35 35
36 36 This is inspired by WebOb's class of the same name.
37 37 """
38 38 def __init__(self):
39 39 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
40 40 # don't rely on parameters that much, so it shouldn't be a perf issue.
41 41 # we can always add dict for fast lookups.
42 42 self._items = []
43 43
44 44 def __getitem__(self, key):
45 45 """Returns the last set value for a key."""
46 46 for k, v in reversed(self._items):
47 47 if k == key:
48 48 return v
49 49
50 50 raise KeyError(key)
51 51
52 52 def __setitem__(self, key, value):
53 53 """Replace a values for a key with a new value."""
54 54 try:
55 55 del self[key]
56 56 except KeyError:
57 57 pass
58 58
59 59 self._items.append((key, value))
60 60
61 61 def __delitem__(self, key):
62 62 """Delete all values for a key."""
63 63 oldlen = len(self._items)
64 64
65 65 self._items[:] = [(k, v) for k, v in self._items if k != key]
66 66
67 67 if oldlen == len(self._items):
68 68 raise KeyError(key)
69 69
70 70 def __contains__(self, key):
71 71 return any(k == key for k, v in self._items)
72 72
73 73 def __len__(self):
74 74 return len(self._items)
75 75
76 76 def get(self, key, default=None):
77 77 try:
78 78 return self.__getitem__(key)
79 79 except KeyError:
80 80 return default
81 81
82 82 def add(self, key, value):
83 83 """Add a new value for a key. Does not replace existing values."""
84 84 self._items.append((key, value))
85 85
86 86 def getall(self, key):
87 87 """Obtains all values for a key."""
88 88 return [v for k, v in self._items if k == key]
89 89
90 90 def getone(self, key):
91 91 """Obtain a single value for a key.
92 92
93 93 Raises KeyError if key not defined or it has multiple values set.
94 94 """
95 95 vals = self.getall(key)
96 96
97 97 if not vals:
98 98 raise KeyError(key)
99 99
100 100 if len(vals) > 1:
101 101 raise KeyError('multiple values for %r' % key)
102 102
103 103 return vals[0]
104 104
105 105 def asdictoflists(self):
106 106 d = {}
107 107 for k, v in self._items:
108 108 if k in d:
109 109 d[k].append(v)
110 110 else:
111 111 d[k] = [v]
112 112
113 113 return d
114 114
115 115 @attr.s(frozen=True)
116 116 class parsedrequest(object):
117 117 """Represents a parsed WSGI request.
118 118
119 119 Contains both parsed parameters as well as a handle on the input stream.
120 120 """
121 121
122 122 # Request method.
123 123 method = attr.ib()
124 124 # Full URL for this request.
125 125 url = attr.ib()
126 126 # URL without any path components. Just <proto>://<host><port>.
127 127 baseurl = attr.ib()
128 128 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
129 129 # of HTTP: Host header for hostname. This is likely what clients used.
130 130 advertisedurl = attr.ib()
131 131 advertisedbaseurl = attr.ib()
132 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
133 urlscheme = attr.ib()
134 # Value of REMOTE_USER, if set, or None.
135 remoteuser = attr.ib()
136 # Value of REMOTE_HOST, if set, or None.
137 remotehost = attr.ib()
132 138 # WSGI application path.
133 139 apppath = attr.ib()
134 140 # List of path parts to be used for dispatch.
135 141 dispatchparts = attr.ib()
136 142 # URL path component (no query string) used for dispatch.
137 143 dispatchpath = attr.ib()
138 144 # Whether there is a path component to this request. This can be true
139 145 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
140 146 havepathinfo = attr.ib()
141 147 # Raw query string (part after "?" in URL).
142 148 querystring = attr.ib()
143 149 # multidict of query string parameters.
144 150 qsparams = attr.ib()
145 151 # wsgiref.headers.Headers instance. Operates like a dict with case
146 152 # insensitive keys.
147 153 headers = attr.ib()
148 154 # Request body input stream.
149 155 bodyfh = attr.ib()
150 156
151 157 def parserequestfromenv(env, bodyfh):
152 158 """Parse URL components from environment variables.
153 159
154 160 WSGI defines request attributes via environment variables. This function
155 161 parses the environment variables into a data structure.
156 162 """
157 163 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
158 164
159 165 # We first validate that the incoming object conforms with the WSGI spec.
160 166 # We only want to be dealing with spec-conforming WSGI implementations.
161 167 # TODO enable this once we fix internal violations.
162 168 #wsgiref.validate.check_environ(env)
163 169
164 170 # PEP-0333 states that environment keys and values are native strings
165 171 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
166 172 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
167 173 # in Mercurial, so mass convert string keys and values to bytes.
168 174 if pycompat.ispy3:
169 175 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
170 176 env = {k: v.encode('latin-1') if isinstance(v, str) else v
171 177 for k, v in env.iteritems()}
172 178
173 179 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
174 180 # the environment variables.
175 181 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
176 182 # how URLs are reconstructed.
177 183 fullurl = env['wsgi.url_scheme'] + '://'
178 184 advertisedfullurl = fullurl
179 185
180 186 def addport(s):
181 187 if env['wsgi.url_scheme'] == 'https':
182 188 if env['SERVER_PORT'] != '443':
183 189 s += ':' + env['SERVER_PORT']
184 190 else:
185 191 if env['SERVER_PORT'] != '80':
186 192 s += ':' + env['SERVER_PORT']
187 193
188 194 return s
189 195
190 196 if env.get('HTTP_HOST'):
191 197 fullurl += env['HTTP_HOST']
192 198 else:
193 199 fullurl += env['SERVER_NAME']
194 200 fullurl = addport(fullurl)
195 201
196 202 advertisedfullurl += env['SERVER_NAME']
197 203 advertisedfullurl = addport(advertisedfullurl)
198 204
199 205 baseurl = fullurl
200 206 advertisedbaseurl = advertisedfullurl
201 207
202 208 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
203 209 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
204 210 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
205 211 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
206 212
207 213 if env.get('QUERY_STRING'):
208 214 fullurl += '?' + env['QUERY_STRING']
209 215 advertisedfullurl += '?' + env['QUERY_STRING']
210 216
211 217 # When dispatching requests, we look at the URL components (PATH_INFO
212 218 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
213 219 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
214 220 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
215 221 # root. We also exclude its path components from PATH_INFO when resolving
216 222 # the dispatch path.
217 223
218 224 apppath = env['SCRIPT_NAME']
219 225
220 226 if env.get('REPO_NAME'):
221 227 if not apppath.endswith('/'):
222 228 apppath += '/'
223 229
224 230 apppath += env.get('REPO_NAME')
225 231
226 232 if 'PATH_INFO' in env:
227 233 dispatchparts = env['PATH_INFO'].strip('/').split('/')
228 234
229 235 # Strip out repo parts.
230 236 repoparts = env.get('REPO_NAME', '').split('/')
231 237 if dispatchparts[:len(repoparts)] == repoparts:
232 238 dispatchparts = dispatchparts[len(repoparts):]
233 239 else:
234 240 dispatchparts = []
235 241
236 242 dispatchpath = '/'.join(dispatchparts)
237 243
238 244 querystring = env.get('QUERY_STRING', '')
239 245
240 246 # We store as a list so we have ordering information. We also store as
241 247 # a dict to facilitate fast lookup.
242 248 qsparams = multidict()
243 249 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
244 250 qsparams.add(k, v)
245 251
246 252 # HTTP_* keys contain HTTP request headers. The Headers structure should
247 253 # perform case normalization for us. We just rewrite underscore to dash
248 254 # so keys match what likely went over the wire.
249 255 headers = []
250 256 for k, v in env.iteritems():
251 257 if k.startswith('HTTP_'):
252 258 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
253 259
254 260 headers = wsgiheaders.Headers(headers)
255 261
256 262 # This is kind of a lie because the HTTP header wasn't explicitly
257 263 # sent. But for all intents and purposes it should be OK to lie about
258 264 # this, since a consumer will either either value to determine how many
259 265 # bytes are available to read.
260 266 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
261 267 headers['Content-Length'] = env['CONTENT_LENGTH']
262 268
263 269 # TODO do this once we remove wsgirequest.inp, otherwise we could have
264 270 # multiple readers from the underlying input stream.
265 271 #bodyfh = env['wsgi.input']
266 272 #if 'Content-Length' in headers:
267 273 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
268 274
269 275 return parsedrequest(method=env['REQUEST_METHOD'],
270 276 url=fullurl, baseurl=baseurl,
271 277 advertisedurl=advertisedfullurl,
272 278 advertisedbaseurl=advertisedbaseurl,
279 urlscheme=env['wsgi.url_scheme'],
280 remoteuser=env.get('REMOTE_USER'),
281 remotehost=env.get('REMOTE_HOST'),
273 282 apppath=apppath,
274 283 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
275 284 havepathinfo='PATH_INFO' in env,
276 285 querystring=querystring,
277 286 qsparams=qsparams,
278 287 headers=headers,
279 288 bodyfh=bodyfh)
280 289
281 290 class wsgiresponse(object):
282 291 """Represents a response to a WSGI request.
283 292
284 293 A response consists of a status line, headers, and a body.
285 294
286 295 Consumers must populate the ``status`` and ``headers`` fields and
287 296 make a call to a ``setbody*()`` method before the response can be
288 297 issued.
289 298
290 299 When it is time to start sending the response over the wire,
291 300 ``sendresponse()`` is called. It handles emitting the header portion
292 301 of the response message. It then yields chunks of body data to be
293 302 written to the peer. Typically, the WSGI application itself calls
294 303 and returns the value from ``sendresponse()``.
295 304 """
296 305
297 306 def __init__(self, req, startresponse):
298 307 """Create an empty response tied to a specific request.
299 308
300 309 ``req`` is a ``parsedrequest``. ``startresponse`` is the
301 310 ``start_response`` function passed to the WSGI application.
302 311 """
303 312 self._req = req
304 313 self._startresponse = startresponse
305 314
306 315 self.status = None
307 316 self.headers = wsgiheaders.Headers([])
308 317
309 318 self._bodybytes = None
310 319 self._bodygen = None
311 320 self._started = False
312 321
313 322 def setbodybytes(self, b):
314 323 """Define the response body as static bytes."""
315 324 if self._bodybytes is not None or self._bodygen is not None:
316 325 raise error.ProgrammingError('cannot define body multiple times')
317 326
318 327 self._bodybytes = b
319 328 self.headers['Content-Length'] = '%d' % len(b)
320 329
321 330 def setbodygen(self, gen):
322 331 """Define the response body as a generator of bytes."""
323 332 if self._bodybytes is not None or self._bodygen is not None:
324 333 raise error.ProgrammingError('cannot define body multiple times')
325 334
326 335 self._bodygen = gen
327 336
328 337 def sendresponse(self):
329 338 """Send the generated response to the client.
330 339
331 340 Before this is called, ``status`` must be set and one of
332 341 ``setbodybytes()`` or ``setbodygen()`` must be called.
333 342
334 343 Calling this method multiple times is not allowed.
335 344 """
336 345 if self._started:
337 346 raise error.ProgrammingError('sendresponse() called multiple times')
338 347
339 348 self._started = True
340 349
341 350 if not self.status:
342 351 raise error.ProgrammingError('status line not defined')
343 352
344 353 if self._bodybytes is None and self._bodygen is None:
345 354 raise error.ProgrammingError('response body not defined')
346 355
347 356 # Various HTTP clients (notably httplib) won't read the HTTP response
348 357 # until the HTTP request has been sent in full. If servers (us) send a
349 358 # response before the HTTP request has been fully sent, the connection
350 359 # may deadlock because neither end is reading.
351 360 #
352 361 # We work around this by "draining" the request data before
353 362 # sending any response in some conditions.
354 363 drain = False
355 364 close = False
356 365
357 366 # If the client sent Expect: 100-continue, we assume it is smart enough
358 367 # to deal with the server sending a response before reading the request.
359 368 # (httplib doesn't do this.)
360 369 if self._req.headers.get('Expect', '').lower() == '100-continue':
361 370 pass
362 371 # Only tend to request methods that have bodies. Strictly speaking,
363 372 # we should sniff for a body. But this is fine for our existing
364 373 # WSGI applications.
365 374 elif self._req.method not in ('POST', 'PUT'):
366 375 pass
367 376 else:
368 377 # If we don't know how much data to read, there's no guarantee
369 378 # that we can drain the request responsibly. The WSGI
370 379 # specification only says that servers *should* ensure the
371 380 # input stream doesn't overrun the actual request. So there's
372 381 # no guarantee that reading until EOF won't corrupt the stream
373 382 # state.
374 383 if not isinstance(self._req.bodyfh, util.cappedreader):
375 384 close = True
376 385 else:
377 386 # We /could/ only drain certain HTTP response codes. But 200 and
378 387 # non-200 wire protocol responses both require draining. Since
379 388 # we have a capped reader in place for all situations where we
380 389 # drain, it is safe to read from that stream. We'll either do
381 390 # a drain or no-op if we're already at EOF.
382 391 drain = True
383 392
384 393 if close:
385 394 self.headers['Connection'] = 'Close'
386 395
387 396 if drain:
388 397 assert isinstance(self._req.bodyfh, util.cappedreader)
389 398 while True:
390 399 chunk = self._req.bodyfh.read(32768)
391 400 if not chunk:
392 401 break
393 402
394 403 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
395 404 if self._bodybytes:
396 405 yield self._bodybytes
397 406 elif self._bodygen:
398 407 for chunk in self._bodygen:
399 408 yield chunk
400 409 else:
401 410 error.ProgrammingError('do not know how to send body')
402 411
403 412 class wsgirequest(object):
404 413 """Higher-level API for a WSGI request.
405 414
406 415 WSGI applications are invoked with 2 arguments. They are used to
407 416 instantiate instances of this class, which provides higher-level APIs
408 417 for obtaining request parameters, writing HTTP output, etc.
409 418 """
410 419 def __init__(self, wsgienv, start_response):
411 420 version = wsgienv[r'wsgi.version']
412 421 if (version < (1, 0)) or (version >= (2, 0)):
413 422 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
414 423 % version)
415 424
416 425 inp = wsgienv[r'wsgi.input']
417 426
418 427 if r'HTTP_CONTENT_LENGTH' in wsgienv:
419 428 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
420 429 elif r'CONTENT_LENGTH' in wsgienv:
421 430 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
422 431
423 432 self.err = wsgienv[r'wsgi.errors']
424 433 self.threaded = wsgienv[r'wsgi.multithread']
425 434 self.multiprocess = wsgienv[r'wsgi.multiprocess']
426 435 self.run_once = wsgienv[r'wsgi.run_once']
427 436 self.env = wsgienv
428 437 self.req = parserequestfromenv(wsgienv, inp)
429 438 self.res = wsgiresponse(self.req, start_response)
430 439 self._start_response = start_response
431 440 self.server_write = None
432 441 self.headers = []
433 442
434 443 def respond(self, status, type, filename=None, body=None):
435 444 if not isinstance(type, str):
436 445 type = pycompat.sysstr(type)
437 446 if self._start_response is not None:
438 447 self.headers.append((r'Content-Type', type))
439 448 if filename:
440 449 filename = (filename.rpartition('/')[-1]
441 450 .replace('\\', '\\\\').replace('"', '\\"'))
442 451 self.headers.append(('Content-Disposition',
443 452 'inline; filename="%s"' % filename))
444 453 if body is not None:
445 454 self.headers.append((r'Content-Length', str(len(body))))
446 455
447 456 for k, v in self.headers:
448 457 if not isinstance(v, str):
449 458 raise TypeError('header value must be string: %r' % (v,))
450 459
451 460 if isinstance(status, ErrorResponse):
452 461 self.headers.extend(status.headers)
453 462 if status.code == HTTP_NOT_MODIFIED:
454 463 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
455 464 # it MUST NOT include any headers other than these and no
456 465 # body
457 466 self.headers = [(k, v) for (k, v) in self.headers if
458 467 k in ('Date', 'ETag', 'Expires',
459 468 'Cache-Control', 'Vary')]
460 469 status = statusmessage(status.code, pycompat.bytestr(status))
461 470 elif status == 200:
462 471 status = '200 Script output follows'
463 472 elif isinstance(status, int):
464 473 status = statusmessage(status)
465 474
466 475 # Various HTTP clients (notably httplib) won't read the HTTP
467 476 # response until the HTTP request has been sent in full. If servers
468 477 # (us) send a response before the HTTP request has been fully sent,
469 478 # the connection may deadlock because neither end is reading.
470 479 #
471 480 # We work around this by "draining" the request data before
472 481 # sending any response in some conditions.
473 482 drain = False
474 483 close = False
475 484
476 485 # If the client sent Expect: 100-continue, we assume it is smart
477 486 # enough to deal with the server sending a response before reading
478 487 # the request. (httplib doesn't do this.)
479 488 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
480 489 pass
481 490 # Only tend to request methods that have bodies. Strictly speaking,
482 491 # we should sniff for a body. But this is fine for our existing
483 492 # WSGI applications.
484 493 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
485 494 pass
486 495 else:
487 496 # If we don't know how much data to read, there's no guarantee
488 497 # that we can drain the request responsibly. The WSGI
489 498 # specification only says that servers *should* ensure the
490 499 # input stream doesn't overrun the actual request. So there's
491 500 # no guarantee that reading until EOF won't corrupt the stream
492 501 # state.
493 502 if not isinstance(self.req.bodyfh, util.cappedreader):
494 503 close = True
495 504 else:
496 505 # We /could/ only drain certain HTTP response codes. But 200
497 506 # and non-200 wire protocol responses both require draining.
498 507 # Since we have a capped reader in place for all situations
499 508 # where we drain, it is safe to read from that stream. We'll
500 509 # either do a drain or no-op if we're already at EOF.
501 510 drain = True
502 511
503 512 if close:
504 513 self.headers.append((r'Connection', r'Close'))
505 514
506 515 if drain:
507 516 assert isinstance(self.req.bodyfh, util.cappedreader)
508 517 while True:
509 518 chunk = self.req.bodyfh.read(32768)
510 519 if not chunk:
511 520 break
512 521
513 522 self.server_write = self._start_response(
514 523 pycompat.sysstr(status), self.headers)
515 524 self._start_response = None
516 525 self.headers = []
517 526 if body is not None:
518 527 self.write(body)
519 528 self.server_write = None
520 529
521 530 def write(self, thing):
522 531 if thing:
523 532 try:
524 533 self.server_write(thing)
525 534 except socket.error as inst:
526 535 if inst[0] != errno.ECONNRESET:
527 536 raise
528 537
529 538 def flush(self):
530 539 return None
531 540
532 541 def wsgiapplication(app_maker):
533 542 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
534 543 can and should now be used as a WSGI application.'''
535 544 application = app_maker()
536 545 def run_wsgi(env, respond):
537 546 return application(env, respond)
538 547 return run_wsgi
@@ -1,655 +1,654
1 1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 2 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import struct
11 11 import sys
12 12 import threading
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 wireproto,
22 22 wireprototypes,
23 23 )
24 24
25 25 stringio = util.stringio
26 26
27 27 urlerr = util.urlerr
28 28 urlreq = util.urlreq
29 29
30 30 HTTP_OK = 200
31 31
32 32 HGTYPE = 'application/mercurial-0.1'
33 33 HGTYPE2 = 'application/mercurial-0.2'
34 34 HGERRTYPE = 'application/hg-error'
35 35
36 36 SSHV1 = wireprototypes.SSHV1
37 37 SSHV2 = wireprototypes.SSHV2
38 38
39 39 def decodevaluefromheaders(req, headerprefix):
40 40 """Decode a long value from multiple HTTP request headers.
41 41
42 42 Returns the value as a bytes, not a str.
43 43 """
44 44 chunks = []
45 45 i = 1
46 46 while True:
47 47 v = req.headers.get(b'%s-%d' % (headerprefix, i))
48 48 if v is None:
49 49 break
50 50 chunks.append(pycompat.bytesurl(v))
51 51 i += 1
52 52
53 53 return ''.join(chunks)
54 54
55 55 class httpv1protocolhandler(wireprototypes.baseprotocolhandler):
56 def __init__(self, wsgireq, req, ui, checkperm):
57 self._wsgireq = wsgireq
56 def __init__(self, req, ui, checkperm):
58 57 self._req = req
59 58 self._ui = ui
60 59 self._checkperm = checkperm
61 60
62 61 @property
63 62 def name(self):
64 63 return 'http-v1'
65 64
66 65 def getargs(self, args):
67 66 knownargs = self._args()
68 67 data = {}
69 68 keys = args.split()
70 69 for k in keys:
71 70 if k == '*':
72 71 star = {}
73 72 for key in knownargs.keys():
74 73 if key != 'cmd' and key not in keys:
75 74 star[key] = knownargs[key][0]
76 75 data['*'] = star
77 76 else:
78 77 data[k] = knownargs[k][0]
79 78 return [data[k] for k in keys]
80 79
81 80 def _args(self):
82 81 args = self._req.qsparams.asdictoflists()
83 82 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
84 83 if postlen:
85 84 args.update(urlreq.parseqs(
86 85 self._req.bodyfh.read(postlen), keep_blank_values=True))
87 86 return args
88 87
89 88 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
90 89 args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
91 90 return args
92 91
93 92 def forwardpayload(self, fp):
94 93 # Existing clients *always* send Content-Length.
95 94 length = int(self._req.headers[b'Content-Length'])
96 95
97 96 # If httppostargs is used, we need to read Content-Length
98 97 # minus the amount that was consumed by args.
99 98 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
100 99 for s in util.filechunkiter(self._req.bodyfh, limit=length):
101 100 fp.write(s)
102 101
103 102 @contextlib.contextmanager
104 103 def mayberedirectstdio(self):
105 104 oldout = self._ui.fout
106 105 olderr = self._ui.ferr
107 106
108 107 out = util.stringio()
109 108
110 109 try:
111 110 self._ui.fout = out
112 111 self._ui.ferr = out
113 112 yield out
114 113 finally:
115 114 self._ui.fout = oldout
116 115 self._ui.ferr = olderr
117 116
118 117 def client(self):
119 118 return 'remote:%s:%s:%s' % (
120 self._wsgireq.env.get('wsgi.url_scheme') or 'http',
121 urlreq.quote(self._wsgireq.env.get('REMOTE_HOST', '')),
122 urlreq.quote(self._wsgireq.env.get('REMOTE_USER', '')))
119 self._req.urlscheme,
120 urlreq.quote(self._req.remotehost or ''),
121 urlreq.quote(self._req.remoteuser or ''))
123 122
124 123 def addcapabilities(self, repo, caps):
125 124 caps.append('httpheader=%d' %
126 125 repo.ui.configint('server', 'maxhttpheaderlen'))
127 126 if repo.ui.configbool('experimental', 'httppostargs'):
128 127 caps.append('httppostargs')
129 128
130 129 # FUTURE advertise 0.2rx once support is implemented
131 130 # FUTURE advertise minrx and mintx after consulting config option
132 131 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
133 132
134 133 compengines = wireproto.supportedcompengines(repo.ui, util.SERVERROLE)
135 134 if compengines:
136 135 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
137 136 for e in compengines)
138 137 caps.append('compression=%s' % comptypes)
139 138
140 139 return caps
141 140
142 141 def checkperm(self, perm):
143 142 return self._checkperm(perm)
144 143
145 144 # This method exists mostly so that extensions like remotefilelog can
146 145 # disable a kludgey legacy method only over http. As of early 2018,
147 146 # there are no other known users, so with any luck we can discard this
148 147 # hook if remotefilelog becomes a first-party extension.
149 148 def iscmd(cmd):
150 149 return cmd in wireproto.commands
151 150
152 151 def handlewsgirequest(rctx, wsgireq, req, res, checkperm):
153 152 """Possibly process a wire protocol request.
154 153
155 154 If the current request is a wire protocol request, the request is
156 155 processed by this function.
157 156
158 157 ``wsgireq`` is a ``wsgirequest`` instance.
159 158 ``req`` is a ``parsedrequest`` instance.
160 159 ``res`` is a ``wsgiresponse`` instance.
161 160
162 161 Returns a bool indicating if the request was serviced. If set, the caller
163 162 should stop processing the request, as a response has already been issued.
164 163 """
165 164 # Avoid cycle involving hg module.
166 165 from .hgweb import common as hgwebcommon
167 166
168 167 repo = rctx.repo
169 168
170 169 # HTTP version 1 wire protocol requests are denoted by a "cmd" query
171 170 # string parameter. If it isn't present, this isn't a wire protocol
172 171 # request.
173 172 if 'cmd' not in req.qsparams:
174 173 return False
175 174
176 175 cmd = req.qsparams['cmd']
177 176
178 177 # The "cmd" request parameter is used by both the wire protocol and hgweb.
179 178 # While not all wire protocol commands are available for all transports,
180 179 # if we see a "cmd" value that resembles a known wire protocol command, we
181 180 # route it to a protocol handler. This is better than routing possible
182 181 # wire protocol requests to hgweb because it prevents hgweb from using
183 182 # known wire protocol commands and it is less confusing for machine
184 183 # clients.
185 184 if not iscmd(cmd):
186 185 return False
187 186
188 187 # The "cmd" query string argument is only valid on the root path of the
189 188 # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo
190 189 # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request
191 190 # in this case. We send an HTTP 404 for backwards compatibility reasons.
192 191 if req.dispatchpath:
193 192 res.status = hgwebcommon.statusmessage(404)
194 193 res.headers['Content-Type'] = HGTYPE
195 194 # TODO This is not a good response to issue for this request. This
196 195 # is mostly for BC for now.
197 196 res.setbodybytes('0\n%s\n' % b'Not Found')
198 197 return True
199 198
200 proto = httpv1protocolhandler(wsgireq, req, repo.ui,
199 proto = httpv1protocolhandler(req, repo.ui,
201 200 lambda perm: checkperm(rctx, wsgireq, perm))
202 201
203 202 # The permissions checker should be the only thing that can raise an
204 203 # ErrorResponse. It is kind of a layer violation to catch an hgweb
205 204 # exception here. So consider refactoring into a exception type that
206 205 # is associated with the wire protocol.
207 206 try:
208 _callhttp(repo, wsgireq, req, res, proto, cmd)
207 _callhttp(repo, req, res, proto, cmd)
209 208 except hgwebcommon.ErrorResponse as e:
210 209 for k, v in e.headers:
211 210 res.headers[k] = v
212 211 res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
213 212 # TODO This response body assumes the failed command was
214 213 # "unbundle." That assumption is not always valid.
215 214 res.setbodybytes('0\n%s\n' % pycompat.bytestr(e))
216 215
217 216 return True
218 217
219 218 def _httpresponsetype(ui, req, prefer_uncompressed):
220 219 """Determine the appropriate response type and compression settings.
221 220
222 221 Returns a tuple of (mediatype, compengine, engineopts).
223 222 """
224 223 # Determine the response media type and compression engine based
225 224 # on the request parameters.
226 225 protocaps = decodevaluefromheaders(req, 'X-HgProto').split(' ')
227 226
228 227 if '0.2' in protocaps:
229 228 # All clients are expected to support uncompressed data.
230 229 if prefer_uncompressed:
231 230 return HGTYPE2, util._noopengine(), {}
232 231
233 232 # Default as defined by wire protocol spec.
234 233 compformats = ['zlib', 'none']
235 234 for cap in protocaps:
236 235 if cap.startswith('comp='):
237 236 compformats = cap[5:].split(',')
238 237 break
239 238
240 239 # Now find an agreed upon compression format.
241 240 for engine in wireproto.supportedcompengines(ui, util.SERVERROLE):
242 241 if engine.wireprotosupport().name in compformats:
243 242 opts = {}
244 243 level = ui.configint('server', '%slevel' % engine.name())
245 244 if level is not None:
246 245 opts['level'] = level
247 246
248 247 return HGTYPE2, engine, opts
249 248
250 249 # No mutually supported compression format. Fall back to the
251 250 # legacy protocol.
252 251
253 252 # Don't allow untrusted settings because disabling compression or
254 253 # setting a very high compression level could lead to flooding
255 254 # the server's network or CPU.
256 255 opts = {'level': ui.configint('server', 'zliblevel')}
257 256 return HGTYPE, util.compengines['zlib'], opts
258 257
259 def _callhttp(repo, wsgireq, req, res, proto, cmd):
258 def _callhttp(repo, req, res, proto, cmd):
260 259 # Avoid cycle involving hg module.
261 260 from .hgweb import common as hgwebcommon
262 261
263 262 def genversion2(gen, engine, engineopts):
264 263 # application/mercurial-0.2 always sends a payload header
265 264 # identifying the compression engine.
266 265 name = engine.wireprotosupport().name
267 266 assert 0 < len(name) < 256
268 267 yield struct.pack('B', len(name))
269 268 yield name
270 269
271 270 for chunk in gen:
272 271 yield chunk
273 272
274 273 def setresponse(code, contenttype, bodybytes=None, bodygen=None):
275 274 if code == HTTP_OK:
276 275 res.status = '200 Script output follows'
277 276 else:
278 277 res.status = hgwebcommon.statusmessage(code)
279 278
280 279 res.headers['Content-Type'] = contenttype
281 280
282 281 if bodybytes is not None:
283 282 res.setbodybytes(bodybytes)
284 283 if bodygen is not None:
285 284 res.setbodygen(bodygen)
286 285
287 286 if not wireproto.commands.commandavailable(cmd, proto):
288 287 setresponse(HTTP_OK, HGERRTYPE,
289 288 _('requested wire protocol command is not available over '
290 289 'HTTP'))
291 290 return
292 291
293 292 proto.checkperm(wireproto.commands[cmd].permission)
294 293
295 294 rsp = wireproto.dispatch(repo, proto, cmd)
296 295
297 296 if isinstance(rsp, bytes):
298 297 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
299 298 elif isinstance(rsp, wireprototypes.bytesresponse):
300 299 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp.data)
301 300 elif isinstance(rsp, wireprototypes.streamreslegacy):
302 301 setresponse(HTTP_OK, HGTYPE, bodygen=rsp.gen)
303 302 elif isinstance(rsp, wireprototypes.streamres):
304 303 gen = rsp.gen
305 304
306 305 # This code for compression should not be streamres specific. It
307 306 # is here because we only compress streamres at the moment.
308 307 mediatype, engine, engineopts = _httpresponsetype(
309 308 repo.ui, req, rsp.prefer_uncompressed)
310 309 gen = engine.compressstream(gen, engineopts)
311 310
312 311 if mediatype == HGTYPE2:
313 312 gen = genversion2(gen, engine, engineopts)
314 313
315 314 setresponse(HTTP_OK, mediatype, bodygen=gen)
316 315 elif isinstance(rsp, wireprototypes.pushres):
317 316 rsp = '%d\n%s' % (rsp.res, rsp.output)
318 317 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
319 318 elif isinstance(rsp, wireprototypes.pusherr):
320 319 rsp = '0\n%s\n' % rsp.res
321 320 res.drain = True
322 321 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
323 322 elif isinstance(rsp, wireprototypes.ooberror):
324 323 setresponse(HTTP_OK, HGERRTYPE, bodybytes=rsp.message)
325 324 else:
326 325 raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
327 326
328 327 def _sshv1respondbytes(fout, value):
329 328 """Send a bytes response for protocol version 1."""
330 329 fout.write('%d\n' % len(value))
331 330 fout.write(value)
332 331 fout.flush()
333 332
334 333 def _sshv1respondstream(fout, source):
335 334 write = fout.write
336 335 for chunk in source.gen:
337 336 write(chunk)
338 337 fout.flush()
339 338
340 339 def _sshv1respondooberror(fout, ferr, rsp):
341 340 ferr.write(b'%s\n-\n' % rsp)
342 341 ferr.flush()
343 342 fout.write(b'\n')
344 343 fout.flush()
345 344
346 345 class sshv1protocolhandler(wireprototypes.baseprotocolhandler):
347 346 """Handler for requests services via version 1 of SSH protocol."""
348 347 def __init__(self, ui, fin, fout):
349 348 self._ui = ui
350 349 self._fin = fin
351 350 self._fout = fout
352 351
353 352 @property
354 353 def name(self):
355 354 return wireprototypes.SSHV1
356 355
357 356 def getargs(self, args):
358 357 data = {}
359 358 keys = args.split()
360 359 for n in xrange(len(keys)):
361 360 argline = self._fin.readline()[:-1]
362 361 arg, l = argline.split()
363 362 if arg not in keys:
364 363 raise error.Abort(_("unexpected parameter %r") % arg)
365 364 if arg == '*':
366 365 star = {}
367 366 for k in xrange(int(l)):
368 367 argline = self._fin.readline()[:-1]
369 368 arg, l = argline.split()
370 369 val = self._fin.read(int(l))
371 370 star[arg] = val
372 371 data['*'] = star
373 372 else:
374 373 val = self._fin.read(int(l))
375 374 data[arg] = val
376 375 return [data[k] for k in keys]
377 376
378 377 def forwardpayload(self, fpout):
379 378 # We initially send an empty response. This tells the client it is
380 379 # OK to start sending data. If a client sees any other response, it
381 380 # interprets it as an error.
382 381 _sshv1respondbytes(self._fout, b'')
383 382
384 383 # The file is in the form:
385 384 #
386 385 # <chunk size>\n<chunk>
387 386 # ...
388 387 # 0\n
389 388 count = int(self._fin.readline())
390 389 while count:
391 390 fpout.write(self._fin.read(count))
392 391 count = int(self._fin.readline())
393 392
394 393 @contextlib.contextmanager
395 394 def mayberedirectstdio(self):
396 395 yield None
397 396
398 397 def client(self):
399 398 client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
400 399 return 'remote:ssh:' + client
401 400
402 401 def addcapabilities(self, repo, caps):
403 402 return caps
404 403
405 404 def checkperm(self, perm):
406 405 pass
407 406
408 407 class sshv2protocolhandler(sshv1protocolhandler):
409 408 """Protocol handler for version 2 of the SSH protocol."""
410 409
411 410 @property
412 411 def name(self):
413 412 return wireprototypes.SSHV2
414 413
415 414 def _runsshserver(ui, repo, fin, fout, ev):
416 415 # This function operates like a state machine of sorts. The following
417 416 # states are defined:
418 417 #
419 418 # protov1-serving
420 419 # Server is in protocol version 1 serving mode. Commands arrive on
421 420 # new lines. These commands are processed in this state, one command
422 421 # after the other.
423 422 #
424 423 # protov2-serving
425 424 # Server is in protocol version 2 serving mode.
426 425 #
427 426 # upgrade-initial
428 427 # The server is going to process an upgrade request.
429 428 #
430 429 # upgrade-v2-filter-legacy-handshake
431 430 # The protocol is being upgraded to version 2. The server is expecting
432 431 # the legacy handshake from version 1.
433 432 #
434 433 # upgrade-v2-finish
435 434 # The upgrade to version 2 of the protocol is imminent.
436 435 #
437 436 # shutdown
438 437 # The server is shutting down, possibly in reaction to a client event.
439 438 #
440 439 # And here are their transitions:
441 440 #
442 441 # protov1-serving -> shutdown
443 442 # When server receives an empty request or encounters another
444 443 # error.
445 444 #
446 445 # protov1-serving -> upgrade-initial
447 446 # An upgrade request line was seen.
448 447 #
449 448 # upgrade-initial -> upgrade-v2-filter-legacy-handshake
450 449 # Upgrade to version 2 in progress. Server is expecting to
451 450 # process a legacy handshake.
452 451 #
453 452 # upgrade-v2-filter-legacy-handshake -> shutdown
454 453 # Client did not fulfill upgrade handshake requirements.
455 454 #
456 455 # upgrade-v2-filter-legacy-handshake -> upgrade-v2-finish
457 456 # Client fulfilled version 2 upgrade requirements. Finishing that
458 457 # upgrade.
459 458 #
460 459 # upgrade-v2-finish -> protov2-serving
461 460 # Protocol upgrade to version 2 complete. Server can now speak protocol
462 461 # version 2.
463 462 #
464 463 # protov2-serving -> protov1-serving
465 464 # Ths happens by default since protocol version 2 is the same as
466 465 # version 1 except for the handshake.
467 466
468 467 state = 'protov1-serving'
469 468 proto = sshv1protocolhandler(ui, fin, fout)
470 469 protoswitched = False
471 470
472 471 while not ev.is_set():
473 472 if state == 'protov1-serving':
474 473 # Commands are issued on new lines.
475 474 request = fin.readline()[:-1]
476 475
477 476 # Empty lines signal to terminate the connection.
478 477 if not request:
479 478 state = 'shutdown'
480 479 continue
481 480
482 481 # It looks like a protocol upgrade request. Transition state to
483 482 # handle it.
484 483 if request.startswith(b'upgrade '):
485 484 if protoswitched:
486 485 _sshv1respondooberror(fout, ui.ferr,
487 486 b'cannot upgrade protocols multiple '
488 487 b'times')
489 488 state = 'shutdown'
490 489 continue
491 490
492 491 state = 'upgrade-initial'
493 492 continue
494 493
495 494 available = wireproto.commands.commandavailable(request, proto)
496 495
497 496 # This command isn't available. Send an empty response and go
498 497 # back to waiting for a new command.
499 498 if not available:
500 499 _sshv1respondbytes(fout, b'')
501 500 continue
502 501
503 502 rsp = wireproto.dispatch(repo, proto, request)
504 503
505 504 if isinstance(rsp, bytes):
506 505 _sshv1respondbytes(fout, rsp)
507 506 elif isinstance(rsp, wireprototypes.bytesresponse):
508 507 _sshv1respondbytes(fout, rsp.data)
509 508 elif isinstance(rsp, wireprototypes.streamres):
510 509 _sshv1respondstream(fout, rsp)
511 510 elif isinstance(rsp, wireprototypes.streamreslegacy):
512 511 _sshv1respondstream(fout, rsp)
513 512 elif isinstance(rsp, wireprototypes.pushres):
514 513 _sshv1respondbytes(fout, b'')
515 514 _sshv1respondbytes(fout, b'%d' % rsp.res)
516 515 elif isinstance(rsp, wireprototypes.pusherr):
517 516 _sshv1respondbytes(fout, rsp.res)
518 517 elif isinstance(rsp, wireprototypes.ooberror):
519 518 _sshv1respondooberror(fout, ui.ferr, rsp.message)
520 519 else:
521 520 raise error.ProgrammingError('unhandled response type from '
522 521 'wire protocol command: %s' % rsp)
523 522
524 523 # For now, protocol version 2 serving just goes back to version 1.
525 524 elif state == 'protov2-serving':
526 525 state = 'protov1-serving'
527 526 continue
528 527
529 528 elif state == 'upgrade-initial':
530 529 # We should never transition into this state if we've switched
531 530 # protocols.
532 531 assert not protoswitched
533 532 assert proto.name == wireprototypes.SSHV1
534 533
535 534 # Expected: upgrade <token> <capabilities>
536 535 # If we get something else, the request is malformed. It could be
537 536 # from a future client that has altered the upgrade line content.
538 537 # We treat this as an unknown command.
539 538 try:
540 539 token, caps = request.split(b' ')[1:]
541 540 except ValueError:
542 541 _sshv1respondbytes(fout, b'')
543 542 state = 'protov1-serving'
544 543 continue
545 544
546 545 # Send empty response if we don't support upgrading protocols.
547 546 if not ui.configbool('experimental', 'sshserver.support-v2'):
548 547 _sshv1respondbytes(fout, b'')
549 548 state = 'protov1-serving'
550 549 continue
551 550
552 551 try:
553 552 caps = urlreq.parseqs(caps)
554 553 except ValueError:
555 554 _sshv1respondbytes(fout, b'')
556 555 state = 'protov1-serving'
557 556 continue
558 557
559 558 # We don't see an upgrade request to protocol version 2. Ignore
560 559 # the upgrade request.
561 560 wantedprotos = caps.get(b'proto', [b''])[0]
562 561 if SSHV2 not in wantedprotos:
563 562 _sshv1respondbytes(fout, b'')
564 563 state = 'protov1-serving'
565 564 continue
566 565
567 566 # It looks like we can honor this upgrade request to protocol 2.
568 567 # Filter the rest of the handshake protocol request lines.
569 568 state = 'upgrade-v2-filter-legacy-handshake'
570 569 continue
571 570
572 571 elif state == 'upgrade-v2-filter-legacy-handshake':
573 572 # Client should have sent legacy handshake after an ``upgrade``
574 573 # request. Expected lines:
575 574 #
576 575 # hello
577 576 # between
578 577 # pairs 81
579 578 # 0000...-0000...
580 579
581 580 ok = True
582 581 for line in (b'hello', b'between', b'pairs 81'):
583 582 request = fin.readline()[:-1]
584 583
585 584 if request != line:
586 585 _sshv1respondooberror(fout, ui.ferr,
587 586 b'malformed handshake protocol: '
588 587 b'missing %s' % line)
589 588 ok = False
590 589 state = 'shutdown'
591 590 break
592 591
593 592 if not ok:
594 593 continue
595 594
596 595 request = fin.read(81)
597 596 if request != b'%s-%s' % (b'0' * 40, b'0' * 40):
598 597 _sshv1respondooberror(fout, ui.ferr,
599 598 b'malformed handshake protocol: '
600 599 b'missing between argument value')
601 600 state = 'shutdown'
602 601 continue
603 602
604 603 state = 'upgrade-v2-finish'
605 604 continue
606 605
607 606 elif state == 'upgrade-v2-finish':
608 607 # Send the upgrade response.
609 608 fout.write(b'upgraded %s %s\n' % (token, SSHV2))
610 609 servercaps = wireproto.capabilities(repo, proto)
611 610 rsp = b'capabilities: %s' % servercaps.data
612 611 fout.write(b'%d\n%s\n' % (len(rsp), rsp))
613 612 fout.flush()
614 613
615 614 proto = sshv2protocolhandler(ui, fin, fout)
616 615 protoswitched = True
617 616
618 617 state = 'protov2-serving'
619 618 continue
620 619
621 620 elif state == 'shutdown':
622 621 break
623 622
624 623 else:
625 624 raise error.ProgrammingError('unhandled ssh server state: %s' %
626 625 state)
627 626
628 627 class sshserver(object):
629 628 def __init__(self, ui, repo, logfh=None):
630 629 self._ui = ui
631 630 self._repo = repo
632 631 self._fin = ui.fin
633 632 self._fout = ui.fout
634 633
635 634 # Log write I/O to stdout and stderr if configured.
636 635 if logfh:
637 636 self._fout = util.makeloggingfileobject(
638 637 logfh, self._fout, 'o', logdata=True)
639 638 ui.ferr = util.makeloggingfileobject(
640 639 logfh, ui.ferr, 'e', logdata=True)
641 640
642 641 hook.redirect(True)
643 642 ui.fout = repo.ui.fout = ui.ferr
644 643
645 644 # Prevent insertion/deletion of CRs
646 645 util.setbinary(self._fin)
647 646 util.setbinary(self._fout)
648 647
649 648 def serve_forever(self):
650 649 self.serveuntil(threading.Event())
651 650 sys.exit(0)
652 651
653 652 def serveuntil(self, ev):
654 653 """Serve until a threading.Event is set."""
655 654 _runsshserver(self._ui, self._repo, self._fin, self._fout, ev)
General Comments 0
You need to be logged in to leave comments. Login now