##// END OF EJS Templates
hgweb: expose input stream on parsed WSGI request object...
Gregory Szorc -
r36873:da4e2f87 default
parent child Browse files
Show More
@@ -1,543 +1,544 b''
1 1 # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import re
13 13 import time
14 14
15 15 from ..i18n import _
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_NOT_FOUND,
20 20 HTTP_OK,
21 21 HTTP_SERVER_ERROR,
22 22 cspvalues,
23 23 get_contact,
24 24 get_mtime,
25 25 ismember,
26 26 paritygen,
27 27 staticfile,
28 28 )
29 29
30 30 from .. import (
31 31 configitems,
32 32 encoding,
33 33 error,
34 34 hg,
35 35 profiling,
36 36 pycompat,
37 37 scmutil,
38 38 templater,
39 39 ui as uimod,
40 40 util,
41 41 )
42 42
43 43 from . import (
44 44 hgweb_mod,
45 45 request as requestmod,
46 46 webutil,
47 47 wsgicgi,
48 48 )
49 49 from ..utils import dateutil
50 50
51 51 def cleannames(items):
52 52 return [(util.pconvert(name).strip('/'), path) for name, path in items]
53 53
54 54 def findrepos(paths):
55 55 repos = []
56 56 for prefix, root in cleannames(paths):
57 57 roothead, roottail = os.path.split(root)
58 58 # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
59 59 # /bar/ be served as as foo/N .
60 60 # '*' will not search inside dirs with .hg (except .hg/patches),
61 61 # '**' will search inside dirs with .hg (and thus also find subrepos).
62 62 try:
63 63 recurse = {'*': False, '**': True}[roottail]
64 64 except KeyError:
65 65 repos.append((prefix, root))
66 66 continue
67 67 roothead = os.path.normpath(os.path.abspath(roothead))
68 68 paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
69 69 repos.extend(urlrepos(prefix, roothead, paths))
70 70 return repos
71 71
72 72 def urlrepos(prefix, roothead, paths):
73 73 """yield url paths and filesystem paths from a list of repo paths
74 74
75 75 >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
76 76 >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
77 77 [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
78 78 >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
79 79 [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
80 80 """
81 81 for path in paths:
82 82 path = os.path.normpath(path)
83 83 yield (prefix + '/' +
84 84 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
85 85
86 86 def geturlcgivars(baseurl, port):
87 87 """
88 88 Extract CGI variables from baseurl
89 89
90 90 >>> geturlcgivars(b"http://host.org/base", b"80")
91 91 ('host.org', '80', '/base')
92 92 >>> geturlcgivars(b"http://host.org:8000/base", b"80")
93 93 ('host.org', '8000', '/base')
94 94 >>> geturlcgivars(b'/base', 8000)
95 95 ('', '8000', '/base')
96 96 >>> geturlcgivars(b"base", b'8000')
97 97 ('', '8000', '/base')
98 98 >>> geturlcgivars(b"http://host", b'8000')
99 99 ('host', '8000', '/')
100 100 >>> geturlcgivars(b"http://host/", b'8000')
101 101 ('host', '8000', '/')
102 102 """
103 103 u = util.url(baseurl)
104 104 name = u.host or ''
105 105 if u.port:
106 106 port = u.port
107 107 path = u.path or ""
108 108 if not path.startswith('/'):
109 109 path = '/' + path
110 110
111 111 return name, pycompat.bytestr(port), path
112 112
113 113 class hgwebdir(object):
114 114 """HTTP server for multiple repositories.
115 115
116 116 Given a configuration, different repositories will be served depending
117 117 on the request path.
118 118
119 119 Instances are typically used as WSGI applications.
120 120 """
121 121 def __init__(self, conf, baseui=None):
122 122 self.conf = conf
123 123 self.baseui = baseui
124 124 self.ui = None
125 125 self.lastrefresh = 0
126 126 self.motd = None
127 127 self.refresh()
128 128
129 129 def refresh(self):
130 130 if self.ui:
131 131 refreshinterval = self.ui.configint('web', 'refreshinterval')
132 132 else:
133 133 item = configitems.coreitems['web']['refreshinterval']
134 134 refreshinterval = item.default
135 135
136 136 # refreshinterval <= 0 means to always refresh.
137 137 if (refreshinterval > 0 and
138 138 self.lastrefresh + refreshinterval > time.time()):
139 139 return
140 140
141 141 if self.baseui:
142 142 u = self.baseui.copy()
143 143 else:
144 144 u = uimod.ui.load()
145 145 u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
146 146 u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
147 147 # displaying bundling progress bar while serving feels wrong and may
148 148 # break some wsgi implementations.
149 149 u.setconfig('progress', 'disable', 'true', 'hgweb')
150 150
151 151 if not isinstance(self.conf, (dict, list, tuple)):
152 152 map = {'paths': 'hgweb-paths'}
153 153 if not os.path.exists(self.conf):
154 154 raise error.Abort(_('config file %s not found!') % self.conf)
155 155 u.readconfig(self.conf, remap=map, trust=True)
156 156 paths = []
157 157 for name, ignored in u.configitems('hgweb-paths'):
158 158 for path in u.configlist('hgweb-paths', name):
159 159 paths.append((name, path))
160 160 elif isinstance(self.conf, (list, tuple)):
161 161 paths = self.conf
162 162 elif isinstance(self.conf, dict):
163 163 paths = self.conf.items()
164 164
165 165 repos = findrepos(paths)
166 166 for prefix, root in u.configitems('collections'):
167 167 prefix = util.pconvert(prefix)
168 168 for path in scmutil.walkrepos(root, followsym=True):
169 169 repo = os.path.normpath(path)
170 170 name = util.pconvert(repo)
171 171 if name.startswith(prefix):
172 172 name = name[len(prefix):]
173 173 repos.append((name.lstrip('/'), repo))
174 174
175 175 self.repos = repos
176 176 self.ui = u
177 177 encoding.encoding = self.ui.config('web', 'encoding')
178 178 self.style = self.ui.config('web', 'style')
179 179 self.templatepath = self.ui.config('web', 'templates', untrusted=False)
180 180 self.stripecount = self.ui.config('web', 'stripes')
181 181 if self.stripecount:
182 182 self.stripecount = int(self.stripecount)
183 183 self._baseurl = self.ui.config('web', 'baseurl')
184 184 prefix = self.ui.config('web', 'prefix')
185 185 if prefix.startswith('/'):
186 186 prefix = prefix[1:]
187 187 if prefix.endswith('/'):
188 188 prefix = prefix[:-1]
189 189 self.prefix = prefix
190 190 self.lastrefresh = time.time()
191 191
192 192 def run(self):
193 193 if not encoding.environ.get('GATEWAY_INTERFACE',
194 194 '').startswith("CGI/1."):
195 195 raise RuntimeError("This function is only intended to be "
196 196 "called while running as a CGI script.")
197 197 wsgicgi.launch(self)
198 198
199 199 def __call__(self, env, respond):
200 200 wsgireq = requestmod.wsgirequest(env, respond)
201 201 return self.run_wsgi(wsgireq)
202 202
203 203 def read_allowed(self, ui, wsgireq):
204 204 """Check allow_read and deny_read config options of a repo's ui object
205 205 to determine user permissions. By default, with neither option set (or
206 206 both empty), allow all users to read the repo. There are two ways a
207 207 user can be denied read access: (1) deny_read is not empty, and the
208 208 user is unauthenticated or deny_read contains user (or *), and (2)
209 209 allow_read is not empty and the user is not in allow_read. Return True
210 210 if user is allowed to read the repo, else return False."""
211 211
212 212 user = wsgireq.env.get('REMOTE_USER')
213 213
214 214 deny_read = ui.configlist('web', 'deny_read', untrusted=True)
215 215 if deny_read and (not user or ismember(ui, user, deny_read)):
216 216 return False
217 217
218 218 allow_read = ui.configlist('web', 'allow_read', untrusted=True)
219 219 # by default, allow reading if no allow_read option has been set
220 220 if (not allow_read) or ismember(ui, user, allow_read):
221 221 return True
222 222
223 223 return False
224 224
225 225 def run_wsgi(self, wsgireq):
226 226 profile = self.ui.configbool('profiling', 'enabled')
227 227 with profiling.profile(self.ui, enabled=profile):
228 228 for r in self._runwsgi(wsgireq):
229 229 yield r
230 230
231 231 def _runwsgi(self, wsgireq):
232 232 try:
233 233 self.refresh()
234 234
235 235 csp, nonce = cspvalues(self.ui)
236 236 if csp:
237 237 wsgireq.headers.append(('Content-Security-Policy', csp))
238 238
239 239 virtual = wsgireq.env.get("PATH_INFO", "").strip('/')
240 240 tmpl = self.templater(wsgireq, nonce)
241 241 ctype = tmpl('mimetype', encoding=encoding.encoding)
242 242 ctype = templater.stringify(ctype)
243 243
244 244 # a static file
245 245 if virtual.startswith('static/') or 'static' in wsgireq.form:
246 246 if virtual.startswith('static/'):
247 247 fname = virtual[7:]
248 248 else:
249 249 fname = wsgireq.form['static'][0]
250 250 static = self.ui.config("web", "static", None,
251 251 untrusted=False)
252 252 if not static:
253 253 tp = self.templatepath or templater.templatepaths()
254 254 if isinstance(tp, str):
255 255 tp = [tp]
256 256 static = [os.path.join(p, 'static') for p in tp]
257 257 staticfile(static, fname, wsgireq)
258 258 return []
259 259
260 260 # top-level index
261 261
262 262 repos = dict(self.repos)
263 263
264 264 if (not virtual or virtual == 'index') and virtual not in repos:
265 265 wsgireq.respond(HTTP_OK, ctype)
266 266 return self.makeindex(wsgireq, tmpl)
267 267
268 268 # nested indexes and hgwebs
269 269
270 270 if virtual.endswith('/index') and virtual not in repos:
271 271 subdir = virtual[:-len('index')]
272 272 if any(r.startswith(subdir) for r in repos):
273 273 wsgireq.respond(HTTP_OK, ctype)
274 274 return self.makeindex(wsgireq, tmpl, subdir)
275 275
276 276 def _virtualdirs():
277 277 # Check the full virtual path, each parent, and the root ('')
278 278 if virtual != '':
279 279 yield virtual
280 280
281 281 for p in util.finddirs(virtual):
282 282 yield p
283 283
284 284 yield ''
285 285
286 286 for virtualrepo in _virtualdirs():
287 287 real = repos.get(virtualrepo)
288 288 if real:
289 289 wsgireq.env['REPO_NAME'] = virtualrepo
290 290 # We have to re-parse because of updated environment
291 291 # variable.
292 292 # TODO this is kind of hacky and we should have a better
293 293 # way of doing this than with REPO_NAME side-effects.
294 wsgireq.req = requestmod.parserequestfromenv(wsgireq.env)
294 wsgireq.req = requestmod.parserequestfromenv(
295 wsgireq.env, wsgireq.req.bodyfh)
295 296 try:
296 297 # ensure caller gets private copy of ui
297 298 repo = hg.repository(self.ui.copy(), real)
298 299 return hgweb_mod.hgweb(repo).run_wsgi(wsgireq)
299 300 except IOError as inst:
300 301 msg = encoding.strtolocal(inst.strerror)
301 302 raise ErrorResponse(HTTP_SERVER_ERROR, msg)
302 303 except error.RepoError as inst:
303 304 raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
304 305
305 306 # browse subdirectories
306 307 subdir = virtual + '/'
307 308 if [r for r in repos if r.startswith(subdir)]:
308 309 wsgireq.respond(HTTP_OK, ctype)
309 310 return self.makeindex(wsgireq, tmpl, subdir)
310 311
311 312 # prefixes not found
312 313 wsgireq.respond(HTTP_NOT_FOUND, ctype)
313 314 return tmpl("notfound", repo=virtual)
314 315
315 316 except ErrorResponse as err:
316 317 wsgireq.respond(err, ctype)
317 318 return tmpl('error', error=err.message or '')
318 319 finally:
319 320 tmpl = None
320 321
321 322 def makeindex(self, wsgireq, tmpl, subdir=""):
322 323
323 324 def archivelist(ui, nodeid, url):
324 325 allowed = ui.configlist("web", "allow_archive", untrusted=True)
325 326 archives = []
326 327 for typ, spec in hgweb_mod.archivespecs.iteritems():
327 328 if typ in allowed or ui.configbool("web", "allow" + typ,
328 329 untrusted=True):
329 330 archives.append({"type": typ, "extension": spec[2],
330 331 "node": nodeid, "url": url})
331 332 return archives
332 333
333 334 def rawentries(subdir="", **map):
334 335
335 336 descend = self.ui.configbool('web', 'descend')
336 337 collapse = self.ui.configbool('web', 'collapse')
337 338 seenrepos = set()
338 339 seendirs = set()
339 340 for name, path in self.repos:
340 341
341 342 if not name.startswith(subdir):
342 343 continue
343 344 name = name[len(subdir):]
344 345 directory = False
345 346
346 347 if '/' in name:
347 348 if not descend:
348 349 continue
349 350
350 351 nameparts = name.split('/')
351 352 rootname = nameparts[0]
352 353
353 354 if not collapse:
354 355 pass
355 356 elif rootname in seendirs:
356 357 continue
357 358 elif rootname in seenrepos:
358 359 pass
359 360 else:
360 361 directory = True
361 362 name = rootname
362 363
363 364 # redefine the path to refer to the directory
364 365 discarded = '/'.join(nameparts[1:])
365 366
366 367 # remove name parts plus accompanying slash
367 368 path = path[:-len(discarded) - 1]
368 369
369 370 try:
370 371 r = hg.repository(self.ui, path)
371 372 directory = False
372 373 except (IOError, error.RepoError):
373 374 pass
374 375
375 376 parts = [name]
376 377 parts.insert(0, '/' + subdir.rstrip('/'))
377 378 if wsgireq.env['SCRIPT_NAME']:
378 379 parts.insert(0, wsgireq.env['SCRIPT_NAME'])
379 380 url = re.sub(r'/+', '/', '/'.join(parts) + '/')
380 381
381 382 # show either a directory entry or a repository
382 383 if directory:
383 384 # get the directory's time information
384 385 try:
385 386 d = (get_mtime(path), dateutil.makedate()[1])
386 387 except OSError:
387 388 continue
388 389
389 390 # add '/' to the name to make it obvious that
390 391 # the entry is a directory, not a regular repository
391 392 row = {'contact': "",
392 393 'contact_sort': "",
393 394 'name': name + '/',
394 395 'name_sort': name,
395 396 'url': url,
396 397 'description': "",
397 398 'description_sort': "",
398 399 'lastchange': d,
399 400 'lastchange_sort': d[1]-d[0],
400 401 'archives': [],
401 402 'isdirectory': True,
402 403 'labels': [],
403 404 }
404 405
405 406 seendirs.add(name)
406 407 yield row
407 408 continue
408 409
409 410 u = self.ui.copy()
410 411 try:
411 412 u.readconfig(os.path.join(path, '.hg', 'hgrc'))
412 413 except Exception as e:
413 414 u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
414 415 continue
415 416 def get(section, name, default=uimod._unset):
416 417 return u.config(section, name, default, untrusted=True)
417 418
418 419 if u.configbool("web", "hidden", untrusted=True):
419 420 continue
420 421
421 422 if not self.read_allowed(u, wsgireq):
422 423 continue
423 424
424 425 # update time with local timezone
425 426 try:
426 427 r = hg.repository(self.ui, path)
427 428 except IOError:
428 429 u.warn(_('error accessing repository at %s\n') % path)
429 430 continue
430 431 except error.RepoError:
431 432 u.warn(_('error accessing repository at %s\n') % path)
432 433 continue
433 434 try:
434 435 d = (get_mtime(r.spath), dateutil.makedate()[1])
435 436 except OSError:
436 437 continue
437 438
438 439 contact = get_contact(get)
439 440 description = get("web", "description")
440 441 seenrepos.add(name)
441 442 name = get("web", "name", name)
442 443 row = {'contact': contact or "unknown",
443 444 'contact_sort': contact.upper() or "unknown",
444 445 'name': name,
445 446 'name_sort': name,
446 447 'url': url,
447 448 'description': description or "unknown",
448 449 'description_sort': description.upper() or "unknown",
449 450 'lastchange': d,
450 451 'lastchange_sort': d[1]-d[0],
451 452 'archives': archivelist(u, "tip", url),
452 453 'isdirectory': None,
453 454 'labels': u.configlist('web', 'labels', untrusted=True),
454 455 }
455 456
456 457 yield row
457 458
458 459 sortdefault = None, False
459 460 def entries(sortcolumn="", descending=False, subdir="", **map):
460 461 rows = rawentries(subdir=subdir, **map)
461 462
462 463 if sortcolumn and sortdefault != (sortcolumn, descending):
463 464 sortkey = '%s_sort' % sortcolumn
464 465 rows = sorted(rows, key=lambda x: x[sortkey],
465 466 reverse=descending)
466 467 for row, parity in zip(rows, paritygen(self.stripecount)):
467 468 row['parity'] = parity
468 469 yield row
469 470
470 471 self.refresh()
471 472 sortable = ["name", "description", "contact", "lastchange"]
472 473 sortcolumn, descending = sortdefault
473 474 if 'sort' in wsgireq.form:
474 475 sortcolumn = wsgireq.form['sort'][0]
475 476 descending = sortcolumn.startswith('-')
476 477 if descending:
477 478 sortcolumn = sortcolumn[1:]
478 479 if sortcolumn not in sortable:
479 480 sortcolumn = ""
480 481
481 482 sort = [("sort_%s" % column,
482 483 "%s%s" % ((not descending and column == sortcolumn)
483 484 and "-" or "", column))
484 485 for column in sortable]
485 486
486 487 self.refresh()
487 488 self.updatereqenv(wsgireq.env)
488 489
489 490 return tmpl("index", entries=entries, subdir=subdir,
490 491 pathdef=hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
491 492 sortcolumn=sortcolumn, descending=descending,
492 493 **dict(sort))
493 494
494 495 def templater(self, wsgireq, nonce):
495 496
496 497 def motd(**map):
497 498 if self.motd is not None:
498 499 yield self.motd
499 500 else:
500 501 yield config('web', 'motd')
501 502
502 503 def config(section, name, default=uimod._unset, untrusted=True):
503 504 return self.ui.config(section, name, default, untrusted)
504 505
505 506 self.updatereqenv(wsgireq.env)
506 507
507 508 url = wsgireq.env.get('SCRIPT_NAME', '')
508 509 if not url.endswith('/'):
509 510 url += '/'
510 511
511 512 vars = {}
512 513 styles, (style, mapfile) = hgweb_mod.getstyle(wsgireq, config,
513 514 self.templatepath)
514 515 if style == styles[0]:
515 516 vars['style'] = style
516 517
517 518 sessionvars = webutil.sessionvars(vars, r'?')
518 519 logourl = config('web', 'logourl')
519 520 logoimg = config('web', 'logoimg')
520 521 staticurl = config('web', 'staticurl') or url + 'static/'
521 522 if not staticurl.endswith('/'):
522 523 staticurl += '/'
523 524
524 525 defaults = {
525 526 "encoding": encoding.encoding,
526 527 "motd": motd,
527 528 "url": url,
528 529 "logourl": logourl,
529 530 "logoimg": logoimg,
530 531 "staticurl": staticurl,
531 532 "sessionvars": sessionvars,
532 533 "style": style,
533 534 "nonce": nonce,
534 535 }
535 536 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
536 537 return tmpl
537 538
538 539 def updatereqenv(self, env):
539 540 if self._baseurl is not None:
540 541 name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
541 542 env['SERVER_NAME'] = name
542 543 env['SERVER_PORT'] = port
543 544 env['SCRIPT_NAME'] = path
@@ -1,363 +1,374 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import cgi
12 12 import errno
13 13 import socket
14 14 import wsgiref.headers as wsgiheaders
15 15 #import wsgiref.validate
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_NOT_MODIFIED,
20 20 statusmessage,
21 21 )
22 22
23 23 from ..thirdparty import (
24 24 attr,
25 25 )
26 26 from .. import (
27 27 pycompat,
28 28 util,
29 29 )
30 30
31 31 shortcuts = {
32 32 'cl': [('cmd', ['changelog']), ('rev', None)],
33 33 'sl': [('cmd', ['shortlog']), ('rev', None)],
34 34 'cs': [('cmd', ['changeset']), ('node', None)],
35 35 'f': [('cmd', ['file']), ('filenode', None)],
36 36 'fl': [('cmd', ['filelog']), ('filenode', None)],
37 37 'fd': [('cmd', ['filediff']), ('node', None)],
38 38 'fa': [('cmd', ['annotate']), ('filenode', None)],
39 39 'mf': [('cmd', ['manifest']), ('manifest', None)],
40 40 'ca': [('cmd', ['archive']), ('node', None)],
41 41 'tags': [('cmd', ['tags'])],
42 42 'tip': [('cmd', ['changeset']), ('node', ['tip'])],
43 43 'static': [('cmd', ['static']), ('file', None)]
44 44 }
45 45
46 46 def normalize(form):
47 47 # first expand the shortcuts
48 48 for k in shortcuts:
49 49 if k in form:
50 50 for name, value in shortcuts[k]:
51 51 if value is None:
52 52 value = form[k]
53 53 form[name] = value
54 54 del form[k]
55 55 # And strip the values
56 56 bytesform = {}
57 57 for k, v in form.iteritems():
58 58 bytesform[pycompat.bytesurl(k)] = [
59 59 pycompat.bytesurl(i.strip()) for i in v]
60 60 return bytesform
61 61
62 62 @attr.s(frozen=True)
63 63 class parsedrequest(object):
64 """Represents a parsed WSGI request / static HTTP request parameters."""
64 """Represents a parsed WSGI request.
65
66 Contains both parsed parameters as well as a handle on the input stream.
67 """
65 68
66 69 # Request method.
67 70 method = attr.ib()
68 71 # Full URL for this request.
69 72 url = attr.ib()
70 73 # URL without any path components. Just <proto>://<host><port>.
71 74 baseurl = attr.ib()
72 75 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
73 76 # of HTTP: Host header for hostname. This is likely what clients used.
74 77 advertisedurl = attr.ib()
75 78 advertisedbaseurl = attr.ib()
76 79 # WSGI application path.
77 80 apppath = attr.ib()
78 81 # List of path parts to be used for dispatch.
79 82 dispatchparts = attr.ib()
80 83 # URL path component (no query string) used for dispatch.
81 84 dispatchpath = attr.ib()
82 85 # Whether there is a path component to this request. This can be true
83 86 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
84 87 havepathinfo = attr.ib()
85 88 # Raw query string (part after "?" in URL).
86 89 querystring = attr.ib()
87 90 # List of 2-tuples of query string arguments.
88 91 querystringlist = attr.ib()
89 92 # Dict of query string arguments. Values are lists with at least 1 item.
90 93 querystringdict = attr.ib()
91 94 # wsgiref.headers.Headers instance. Operates like a dict with case
92 95 # insensitive keys.
93 96 headers = attr.ib()
97 # Request body input stream.
98 bodyfh = attr.ib()
94 99
95 def parserequestfromenv(env):
100 def parserequestfromenv(env, bodyfh):
96 101 """Parse URL components from environment variables.
97 102
98 103 WSGI defines request attributes via environment variables. This function
99 104 parses the environment variables into a data structure.
100 105 """
101 106 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
102 107
103 108 # We first validate that the incoming object conforms with the WSGI spec.
104 109 # We only want to be dealing with spec-conforming WSGI implementations.
105 110 # TODO enable this once we fix internal violations.
106 111 #wsgiref.validate.check_environ(env)
107 112
108 113 # PEP-0333 states that environment keys and values are native strings
109 114 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
110 115 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
111 116 # in Mercurial, so mass convert string keys and values to bytes.
112 117 if pycompat.ispy3:
113 118 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
114 119 env = {k: v.encode('latin-1') if isinstance(v, str) else v
115 120 for k, v in env.iteritems()}
116 121
117 122 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
118 123 # the environment variables.
119 124 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
120 125 # how URLs are reconstructed.
121 126 fullurl = env['wsgi.url_scheme'] + '://'
122 127 advertisedfullurl = fullurl
123 128
124 129 def addport(s):
125 130 if env['wsgi.url_scheme'] == 'https':
126 131 if env['SERVER_PORT'] != '443':
127 132 s += ':' + env['SERVER_PORT']
128 133 else:
129 134 if env['SERVER_PORT'] != '80':
130 135 s += ':' + env['SERVER_PORT']
131 136
132 137 return s
133 138
134 139 if env.get('HTTP_HOST'):
135 140 fullurl += env['HTTP_HOST']
136 141 else:
137 142 fullurl += env['SERVER_NAME']
138 143 fullurl = addport(fullurl)
139 144
140 145 advertisedfullurl += env['SERVER_NAME']
141 146 advertisedfullurl = addport(advertisedfullurl)
142 147
143 148 baseurl = fullurl
144 149 advertisedbaseurl = advertisedfullurl
145 150
146 151 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
147 152 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
148 153 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
149 154 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
150 155
151 156 if env.get('QUERY_STRING'):
152 157 fullurl += '?' + env['QUERY_STRING']
153 158 advertisedfullurl += '?' + env['QUERY_STRING']
154 159
155 160 # When dispatching requests, we look at the URL components (PATH_INFO
156 161 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
157 162 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
158 163 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
159 164 # root. We also exclude its path components from PATH_INFO when resolving
160 165 # the dispatch path.
161 166
162 167 apppath = env['SCRIPT_NAME']
163 168
164 169 if env.get('REPO_NAME'):
165 170 if not apppath.endswith('/'):
166 171 apppath += '/'
167 172
168 173 apppath += env.get('REPO_NAME')
169 174
170 175 if 'PATH_INFO' in env:
171 176 dispatchparts = env['PATH_INFO'].strip('/').split('/')
172 177
173 178 # Strip out repo parts.
174 179 repoparts = env.get('REPO_NAME', '').split('/')
175 180 if dispatchparts[:len(repoparts)] == repoparts:
176 181 dispatchparts = dispatchparts[len(repoparts):]
177 182 else:
178 183 dispatchparts = []
179 184
180 185 dispatchpath = '/'.join(dispatchparts)
181 186
182 187 querystring = env.get('QUERY_STRING', '')
183 188
184 189 # We store as a list so we have ordering information. We also store as
185 190 # a dict to facilitate fast lookup.
186 191 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
187 192
188 193 querystringdict = {}
189 194 for k, v in querystringlist:
190 195 if k in querystringdict:
191 196 querystringdict[k].append(v)
192 197 else:
193 198 querystringdict[k] = [v]
194 199
195 200 # HTTP_* keys contain HTTP request headers. The Headers structure should
196 201 # perform case normalization for us. We just rewrite underscore to dash
197 202 # so keys match what likely went over the wire.
198 203 headers = []
199 204 for k, v in env.iteritems():
200 205 if k.startswith('HTTP_'):
201 206 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
202 207
203 208 headers = wsgiheaders.Headers(headers)
204 209
205 210 # This is kind of a lie because the HTTP header wasn't explicitly
206 211 # sent. But for all intents and purposes it should be OK to lie about
207 212 # this, since a consumer will either either value to determine how many
208 213 # bytes are available to read.
209 214 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
210 215 headers['Content-Length'] = env['CONTENT_LENGTH']
211 216
217 # TODO do this once we remove wsgirequest.inp, otherwise we could have
218 # multiple readers from the underlying input stream.
219 #bodyfh = env['wsgi.input']
220 #if 'Content-Length' in headers:
221 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
222
212 223 return parsedrequest(method=env['REQUEST_METHOD'],
213 224 url=fullurl, baseurl=baseurl,
214 225 advertisedurl=advertisedfullurl,
215 226 advertisedbaseurl=advertisedbaseurl,
216 227 apppath=apppath,
217 228 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
218 229 havepathinfo='PATH_INFO' in env,
219 230 querystring=querystring,
220 231 querystringlist=querystringlist,
221 232 querystringdict=querystringdict,
222 headers=headers)
233 headers=headers,
234 bodyfh=bodyfh)
223 235
224 236 class wsgirequest(object):
225 237 """Higher-level API for a WSGI request.
226 238
227 239 WSGI applications are invoked with 2 arguments. They are used to
228 240 instantiate instances of this class, which provides higher-level APIs
229 241 for obtaining request parameters, writing HTTP output, etc.
230 242 """
231 243 def __init__(self, wsgienv, start_response):
232 244 version = wsgienv[r'wsgi.version']
233 245 if (version < (1, 0)) or (version >= (2, 0)):
234 246 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
235 247 % version)
236 self.inp = wsgienv[r'wsgi.input']
248
249 inp = wsgienv[r'wsgi.input']
237 250
238 251 if r'HTTP_CONTENT_LENGTH' in wsgienv:
239 self.inp = util.cappedreader(self.inp,
240 int(wsgienv[r'HTTP_CONTENT_LENGTH']))
252 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
241 253 elif r'CONTENT_LENGTH' in wsgienv:
242 self.inp = util.cappedreader(self.inp,
243 int(wsgienv[r'CONTENT_LENGTH']))
254 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
244 255
245 256 self.err = wsgienv[r'wsgi.errors']
246 257 self.threaded = wsgienv[r'wsgi.multithread']
247 258 self.multiprocess = wsgienv[r'wsgi.multiprocess']
248 259 self.run_once = wsgienv[r'wsgi.run_once']
249 260 self.env = wsgienv
250 self.form = normalize(cgi.parse(self.inp,
261 self.form = normalize(cgi.parse(inp,
251 262 self.env,
252 263 keep_blank_values=1))
253 264 self._start_response = start_response
254 265 self.server_write = None
255 266 self.headers = []
256 267
257 self.req = parserequestfromenv(wsgienv)
268 self.req = parserequestfromenv(wsgienv, inp)
258 269
259 270 def respond(self, status, type, filename=None, body=None):
260 271 if not isinstance(type, str):
261 272 type = pycompat.sysstr(type)
262 273 if self._start_response is not None:
263 274 self.headers.append((r'Content-Type', type))
264 275 if filename:
265 276 filename = (filename.rpartition('/')[-1]
266 277 .replace('\\', '\\\\').replace('"', '\\"'))
267 278 self.headers.append(('Content-Disposition',
268 279 'inline; filename="%s"' % filename))
269 280 if body is not None:
270 281 self.headers.append((r'Content-Length', str(len(body))))
271 282
272 283 for k, v in self.headers:
273 284 if not isinstance(v, str):
274 285 raise TypeError('header value must be string: %r' % (v,))
275 286
276 287 if isinstance(status, ErrorResponse):
277 288 self.headers.extend(status.headers)
278 289 if status.code == HTTP_NOT_MODIFIED:
279 290 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
280 291 # it MUST NOT include any headers other than these and no
281 292 # body
282 293 self.headers = [(k, v) for (k, v) in self.headers if
283 294 k in ('Date', 'ETag', 'Expires',
284 295 'Cache-Control', 'Vary')]
285 296 status = statusmessage(status.code, pycompat.bytestr(status))
286 297 elif status == 200:
287 298 status = '200 Script output follows'
288 299 elif isinstance(status, int):
289 300 status = statusmessage(status)
290 301
291 302 # Various HTTP clients (notably httplib) won't read the HTTP
292 303 # response until the HTTP request has been sent in full. If servers
293 304 # (us) send a response before the HTTP request has been fully sent,
294 305 # the connection may deadlock because neither end is reading.
295 306 #
296 307 # We work around this by "draining" the request data before
297 308 # sending any response in some conditions.
298 309 drain = False
299 310 close = False
300 311
301 312 # If the client sent Expect: 100-continue, we assume it is smart
302 313 # enough to deal with the server sending a response before reading
303 314 # the request. (httplib doesn't do this.)
304 315 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
305 316 pass
306 317 # Only tend to request methods that have bodies. Strictly speaking,
307 318 # we should sniff for a body. But this is fine for our existing
308 319 # WSGI applications.
309 320 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
310 321 pass
311 322 else:
312 323 # If we don't know how much data to read, there's no guarantee
313 324 # that we can drain the request responsibly. The WSGI
314 325 # specification only says that servers *should* ensure the
315 326 # input stream doesn't overrun the actual request. So there's
316 327 # no guarantee that reading until EOF won't corrupt the stream
317 328 # state.
318 if not isinstance(self.inp, util.cappedreader):
329 if not isinstance(self.req.bodyfh, util.cappedreader):
319 330 close = True
320 331 else:
321 332 # We /could/ only drain certain HTTP response codes. But 200
322 333 # and non-200 wire protocol responses both require draining.
323 334 # Since we have a capped reader in place for all situations
324 335 # where we drain, it is safe to read from that stream. We'll
325 336 # either do a drain or no-op if we're already at EOF.
326 337 drain = True
327 338
328 339 if close:
329 340 self.headers.append((r'Connection', r'Close'))
330 341
331 342 if drain:
332 assert isinstance(self.inp, util.cappedreader)
343 assert isinstance(self.req.bodyfh, util.cappedreader)
333 344 while True:
334 chunk = self.inp.read(32768)
345 chunk = self.req.bodyfh.read(32768)
335 346 if not chunk:
336 347 break
337 348
338 349 self.server_write = self._start_response(
339 350 pycompat.sysstr(status), self.headers)
340 351 self._start_response = None
341 352 self.headers = []
342 353 if body is not None:
343 354 self.write(body)
344 355 self.server_write = None
345 356
346 357 def write(self, thing):
347 358 if thing:
348 359 try:
349 360 self.server_write(thing)
350 361 except socket.error as inst:
351 362 if inst[0] != errno.ECONNRESET:
352 363 raise
353 364
354 365 def flush(self):
355 366 return None
356 367
357 368 def wsgiapplication(app_maker):
358 369 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
359 370 can and should now be used as a WSGI application.'''
360 371 application = app_maker()
361 372 def run_wsgi(env, respond):
362 373 return application(env, respond)
363 374 return run_wsgi
@@ -1,649 +1,649 b''
1 1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 2 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import struct
11 11 import sys
12 12 import threading
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 wireproto,
22 22 wireprototypes,
23 23 )
24 24
25 25 stringio = util.stringio
26 26
27 27 urlerr = util.urlerr
28 28 urlreq = util.urlreq
29 29
30 30 HTTP_OK = 200
31 31
32 32 HGTYPE = 'application/mercurial-0.1'
33 33 HGTYPE2 = 'application/mercurial-0.2'
34 34 HGERRTYPE = 'application/hg-error'
35 35
36 36 SSHV1 = wireprototypes.SSHV1
37 37 SSHV2 = wireprototypes.SSHV2
38 38
39 39 def decodevaluefromheaders(req, headerprefix):
40 40 """Decode a long value from multiple HTTP request headers.
41 41
42 42 Returns the value as a bytes, not a str.
43 43 """
44 44 chunks = []
45 45 i = 1
46 46 while True:
47 47 v = req.headers.get(b'%s-%d' % (headerprefix, i))
48 48 if v is None:
49 49 break
50 50 chunks.append(pycompat.bytesurl(v))
51 51 i += 1
52 52
53 53 return ''.join(chunks)
54 54
55 55 class httpv1protocolhandler(wireprototypes.baseprotocolhandler):
56 56 def __init__(self, wsgireq, req, ui, checkperm):
57 57 self._wsgireq = wsgireq
58 58 self._req = req
59 59 self._ui = ui
60 60 self._checkperm = checkperm
61 61
62 62 @property
63 63 def name(self):
64 64 return 'http-v1'
65 65
66 66 def getargs(self, args):
67 67 knownargs = self._args()
68 68 data = {}
69 69 keys = args.split()
70 70 for k in keys:
71 71 if k == '*':
72 72 star = {}
73 73 for key in knownargs.keys():
74 74 if key != 'cmd' and key not in keys:
75 75 star[key] = knownargs[key][0]
76 76 data['*'] = star
77 77 else:
78 78 data[k] = knownargs[k][0]
79 79 return [data[k] for k in keys]
80 80
81 81 def _args(self):
82 82 args = util.rapply(pycompat.bytesurl, self._wsgireq.form.copy())
83 83 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
84 84 if postlen:
85 85 args.update(urlreq.parseqs(
86 self._wsgireq.inp.read(postlen), keep_blank_values=True))
86 self._req.bodyfh.read(postlen), keep_blank_values=True))
87 87 return args
88 88
89 89 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
90 90 args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
91 91 return args
92 92
93 93 def forwardpayload(self, fp):
94 94 # Existing clients *always* send Content-Length.
95 95 length = int(self._req.headers[b'Content-Length'])
96 96
97 97 # If httppostargs is used, we need to read Content-Length
98 98 # minus the amount that was consumed by args.
99 99 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
100 for s in util.filechunkiter(self._wsgireq.inp, limit=length):
100 for s in util.filechunkiter(self._req.bodyfh, limit=length):
101 101 fp.write(s)
102 102
103 103 @contextlib.contextmanager
104 104 def mayberedirectstdio(self):
105 105 oldout = self._ui.fout
106 106 olderr = self._ui.ferr
107 107
108 108 out = util.stringio()
109 109
110 110 try:
111 111 self._ui.fout = out
112 112 self._ui.ferr = out
113 113 yield out
114 114 finally:
115 115 self._ui.fout = oldout
116 116 self._ui.ferr = olderr
117 117
118 118 def client(self):
119 119 return 'remote:%s:%s:%s' % (
120 120 self._wsgireq.env.get('wsgi.url_scheme') or 'http',
121 121 urlreq.quote(self._wsgireq.env.get('REMOTE_HOST', '')),
122 122 urlreq.quote(self._wsgireq.env.get('REMOTE_USER', '')))
123 123
124 124 def addcapabilities(self, repo, caps):
125 125 caps.append('httpheader=%d' %
126 126 repo.ui.configint('server', 'maxhttpheaderlen'))
127 127 if repo.ui.configbool('experimental', 'httppostargs'):
128 128 caps.append('httppostargs')
129 129
130 130 # FUTURE advertise 0.2rx once support is implemented
131 131 # FUTURE advertise minrx and mintx after consulting config option
132 132 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
133 133
134 134 compengines = wireproto.supportedcompengines(repo.ui, util.SERVERROLE)
135 135 if compengines:
136 136 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
137 137 for e in compengines)
138 138 caps.append('compression=%s' % comptypes)
139 139
140 140 return caps
141 141
142 142 def checkperm(self, perm):
143 143 return self._checkperm(perm)
144 144
145 145 # This method exists mostly so that extensions like remotefilelog can
146 146 # disable a kludgey legacy method only over http. As of early 2018,
147 147 # there are no other known users, so with any luck we can discard this
148 148 # hook if remotefilelog becomes a first-party extension.
149 149 def iscmd(cmd):
150 150 return cmd in wireproto.commands
151 151
152 152 def handlewsgirequest(rctx, wsgireq, req, checkperm):
153 153 """Possibly process a wire protocol request.
154 154
155 155 If the current request is a wire protocol request, the request is
156 156 processed by this function.
157 157
158 158 ``wsgireq`` is a ``wsgirequest`` instance.
159 159 ``req`` is a ``parsedrequest`` instance.
160 160
161 161 Returns a 2-tuple of (bool, response) where the 1st element indicates
162 162 whether the request was handled and the 2nd element is a return
163 163 value for a WSGI application (often a generator of bytes).
164 164 """
165 165 # Avoid cycle involving hg module.
166 166 from .hgweb import common as hgwebcommon
167 167
168 168 repo = rctx.repo
169 169
170 170 # HTTP version 1 wire protocol requests are denoted by a "cmd" query
171 171 # string parameter. If it isn't present, this isn't a wire protocol
172 172 # request.
173 173 if 'cmd' not in req.querystringdict:
174 174 return False, None
175 175
176 176 cmd = req.querystringdict['cmd'][0]
177 177
178 178 # The "cmd" request parameter is used by both the wire protocol and hgweb.
179 179 # While not all wire protocol commands are available for all transports,
180 180 # if we see a "cmd" value that resembles a known wire protocol command, we
181 181 # route it to a protocol handler. This is better than routing possible
182 182 # wire protocol requests to hgweb because it prevents hgweb from using
183 183 # known wire protocol commands and it is less confusing for machine
184 184 # clients.
185 185 if not iscmd(cmd):
186 186 return False, None
187 187
188 188 # The "cmd" query string argument is only valid on the root path of the
189 189 # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo
190 190 # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request
191 191 # in this case. We send an HTTP 404 for backwards compatibility reasons.
192 192 if req.dispatchpath:
193 193 res = _handlehttperror(
194 194 hgwebcommon.ErrorResponse(hgwebcommon.HTTP_NOT_FOUND), wsgireq,
195 195 req)
196 196
197 197 return True, res
198 198
199 199 proto = httpv1protocolhandler(wsgireq, req, repo.ui,
200 200 lambda perm: checkperm(rctx, wsgireq, perm))
201 201
202 202 # The permissions checker should be the only thing that can raise an
203 203 # ErrorResponse. It is kind of a layer violation to catch an hgweb
204 204 # exception here. So consider refactoring into a exception type that
205 205 # is associated with the wire protocol.
206 206 try:
207 207 res = _callhttp(repo, wsgireq, req, proto, cmd)
208 208 except hgwebcommon.ErrorResponse as e:
209 209 res = _handlehttperror(e, wsgireq, req)
210 210
211 211 return True, res
212 212
213 213 def _httpresponsetype(ui, req, prefer_uncompressed):
214 214 """Determine the appropriate response type and compression settings.
215 215
216 216 Returns a tuple of (mediatype, compengine, engineopts).
217 217 """
218 218 # Determine the response media type and compression engine based
219 219 # on the request parameters.
220 220 protocaps = decodevaluefromheaders(req, 'X-HgProto').split(' ')
221 221
222 222 if '0.2' in protocaps:
223 223 # All clients are expected to support uncompressed data.
224 224 if prefer_uncompressed:
225 225 return HGTYPE2, util._noopengine(), {}
226 226
227 227 # Default as defined by wire protocol spec.
228 228 compformats = ['zlib', 'none']
229 229 for cap in protocaps:
230 230 if cap.startswith('comp='):
231 231 compformats = cap[5:].split(',')
232 232 break
233 233
234 234 # Now find an agreed upon compression format.
235 235 for engine in wireproto.supportedcompengines(ui, util.SERVERROLE):
236 236 if engine.wireprotosupport().name in compformats:
237 237 opts = {}
238 238 level = ui.configint('server', '%slevel' % engine.name())
239 239 if level is not None:
240 240 opts['level'] = level
241 241
242 242 return HGTYPE2, engine, opts
243 243
244 244 # No mutually supported compression format. Fall back to the
245 245 # legacy protocol.
246 246
247 247 # Don't allow untrusted settings because disabling compression or
248 248 # setting a very high compression level could lead to flooding
249 249 # the server's network or CPU.
250 250 opts = {'level': ui.configint('server', 'zliblevel')}
251 251 return HGTYPE, util.compengines['zlib'], opts
252 252
253 253 def _callhttp(repo, wsgireq, req, proto, cmd):
254 254 def genversion2(gen, engine, engineopts):
255 255 # application/mercurial-0.2 always sends a payload header
256 256 # identifying the compression engine.
257 257 name = engine.wireprotosupport().name
258 258 assert 0 < len(name) < 256
259 259 yield struct.pack('B', len(name))
260 260 yield name
261 261
262 262 for chunk in gen:
263 263 yield chunk
264 264
265 265 if not wireproto.commands.commandavailable(cmd, proto):
266 266 wsgireq.respond(HTTP_OK, HGERRTYPE,
267 267 body=_('requested wire protocol command is not '
268 268 'available over HTTP'))
269 269 return []
270 270
271 271 proto.checkperm(wireproto.commands[cmd].permission)
272 272
273 273 rsp = wireproto.dispatch(repo, proto, cmd)
274 274
275 275 if isinstance(rsp, bytes):
276 276 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
277 277 return []
278 278 elif isinstance(rsp, wireprototypes.bytesresponse):
279 279 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp.data)
280 280 return []
281 281 elif isinstance(rsp, wireprototypes.streamreslegacy):
282 282 gen = rsp.gen
283 283 wsgireq.respond(HTTP_OK, HGTYPE)
284 284 return gen
285 285 elif isinstance(rsp, wireprototypes.streamres):
286 286 gen = rsp.gen
287 287
288 288 # This code for compression should not be streamres specific. It
289 289 # is here because we only compress streamres at the moment.
290 290 mediatype, engine, engineopts = _httpresponsetype(
291 291 repo.ui, req, rsp.prefer_uncompressed)
292 292 gen = engine.compressstream(gen, engineopts)
293 293
294 294 if mediatype == HGTYPE2:
295 295 gen = genversion2(gen, engine, engineopts)
296 296
297 297 wsgireq.respond(HTTP_OK, mediatype)
298 298 return gen
299 299 elif isinstance(rsp, wireprototypes.pushres):
300 300 rsp = '%d\n%s' % (rsp.res, rsp.output)
301 301 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
302 302 return []
303 303 elif isinstance(rsp, wireprototypes.pusherr):
304 304 rsp = '0\n%s\n' % rsp.res
305 305 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
306 306 return []
307 307 elif isinstance(rsp, wireprototypes.ooberror):
308 308 rsp = rsp.message
309 309 wsgireq.respond(HTTP_OK, HGERRTYPE, body=rsp)
310 310 return []
311 311 raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
312 312
313 313 def _handlehttperror(e, wsgireq, req):
314 314 """Called when an ErrorResponse is raised during HTTP request processing."""
315 315
316 316 # TODO This response body assumes the failed command was
317 317 # "unbundle." That assumption is not always valid.
318 318 wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e))
319 319
320 320 return ''
321 321
322 322 def _sshv1respondbytes(fout, value):
323 323 """Send a bytes response for protocol version 1."""
324 324 fout.write('%d\n' % len(value))
325 325 fout.write(value)
326 326 fout.flush()
327 327
328 328 def _sshv1respondstream(fout, source):
329 329 write = fout.write
330 330 for chunk in source.gen:
331 331 write(chunk)
332 332 fout.flush()
333 333
334 334 def _sshv1respondooberror(fout, ferr, rsp):
335 335 ferr.write(b'%s\n-\n' % rsp)
336 336 ferr.flush()
337 337 fout.write(b'\n')
338 338 fout.flush()
339 339
340 340 class sshv1protocolhandler(wireprototypes.baseprotocolhandler):
341 341 """Handler for requests services via version 1 of SSH protocol."""
342 342 def __init__(self, ui, fin, fout):
343 343 self._ui = ui
344 344 self._fin = fin
345 345 self._fout = fout
346 346
347 347 @property
348 348 def name(self):
349 349 return wireprototypes.SSHV1
350 350
351 351 def getargs(self, args):
352 352 data = {}
353 353 keys = args.split()
354 354 for n in xrange(len(keys)):
355 355 argline = self._fin.readline()[:-1]
356 356 arg, l = argline.split()
357 357 if arg not in keys:
358 358 raise error.Abort(_("unexpected parameter %r") % arg)
359 359 if arg == '*':
360 360 star = {}
361 361 for k in xrange(int(l)):
362 362 argline = self._fin.readline()[:-1]
363 363 arg, l = argline.split()
364 364 val = self._fin.read(int(l))
365 365 star[arg] = val
366 366 data['*'] = star
367 367 else:
368 368 val = self._fin.read(int(l))
369 369 data[arg] = val
370 370 return [data[k] for k in keys]
371 371
372 372 def forwardpayload(self, fpout):
373 373 # We initially send an empty response. This tells the client it is
374 374 # OK to start sending data. If a client sees any other response, it
375 375 # interprets it as an error.
376 376 _sshv1respondbytes(self._fout, b'')
377 377
378 378 # The file is in the form:
379 379 #
380 380 # <chunk size>\n<chunk>
381 381 # ...
382 382 # 0\n
383 383 count = int(self._fin.readline())
384 384 while count:
385 385 fpout.write(self._fin.read(count))
386 386 count = int(self._fin.readline())
387 387
388 388 @contextlib.contextmanager
389 389 def mayberedirectstdio(self):
390 390 yield None
391 391
392 392 def client(self):
393 393 client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
394 394 return 'remote:ssh:' + client
395 395
396 396 def addcapabilities(self, repo, caps):
397 397 return caps
398 398
399 399 def checkperm(self, perm):
400 400 pass
401 401
402 402 class sshv2protocolhandler(sshv1protocolhandler):
403 403 """Protocol handler for version 2 of the SSH protocol."""
404 404
405 405 @property
406 406 def name(self):
407 407 return wireprototypes.SSHV2
408 408
409 409 def _runsshserver(ui, repo, fin, fout, ev):
410 410 # This function operates like a state machine of sorts. The following
411 411 # states are defined:
412 412 #
413 413 # protov1-serving
414 414 # Server is in protocol version 1 serving mode. Commands arrive on
415 415 # new lines. These commands are processed in this state, one command
416 416 # after the other.
417 417 #
418 418 # protov2-serving
419 419 # Server is in protocol version 2 serving mode.
420 420 #
421 421 # upgrade-initial
422 422 # The server is going to process an upgrade request.
423 423 #
424 424 # upgrade-v2-filter-legacy-handshake
425 425 # The protocol is being upgraded to version 2. The server is expecting
426 426 # the legacy handshake from version 1.
427 427 #
428 428 # upgrade-v2-finish
429 429 # The upgrade to version 2 of the protocol is imminent.
430 430 #
431 431 # shutdown
432 432 # The server is shutting down, possibly in reaction to a client event.
433 433 #
434 434 # And here are their transitions:
435 435 #
436 436 # protov1-serving -> shutdown
437 437 # When server receives an empty request or encounters another
438 438 # error.
439 439 #
440 440 # protov1-serving -> upgrade-initial
441 441 # An upgrade request line was seen.
442 442 #
443 443 # upgrade-initial -> upgrade-v2-filter-legacy-handshake
444 444 # Upgrade to version 2 in progress. Server is expecting to
445 445 # process a legacy handshake.
446 446 #
447 447 # upgrade-v2-filter-legacy-handshake -> shutdown
448 448 # Client did not fulfill upgrade handshake requirements.
449 449 #
450 450 # upgrade-v2-filter-legacy-handshake -> upgrade-v2-finish
451 451 # Client fulfilled version 2 upgrade requirements. Finishing that
452 452 # upgrade.
453 453 #
454 454 # upgrade-v2-finish -> protov2-serving
455 455 # Protocol upgrade to version 2 complete. Server can now speak protocol
456 456 # version 2.
457 457 #
458 458 # protov2-serving -> protov1-serving
459 459 # Ths happens by default since protocol version 2 is the same as
460 460 # version 1 except for the handshake.
461 461
462 462 state = 'protov1-serving'
463 463 proto = sshv1protocolhandler(ui, fin, fout)
464 464 protoswitched = False
465 465
466 466 while not ev.is_set():
467 467 if state == 'protov1-serving':
468 468 # Commands are issued on new lines.
469 469 request = fin.readline()[:-1]
470 470
471 471 # Empty lines signal to terminate the connection.
472 472 if not request:
473 473 state = 'shutdown'
474 474 continue
475 475
476 476 # It looks like a protocol upgrade request. Transition state to
477 477 # handle it.
478 478 if request.startswith(b'upgrade '):
479 479 if protoswitched:
480 480 _sshv1respondooberror(fout, ui.ferr,
481 481 b'cannot upgrade protocols multiple '
482 482 b'times')
483 483 state = 'shutdown'
484 484 continue
485 485
486 486 state = 'upgrade-initial'
487 487 continue
488 488
489 489 available = wireproto.commands.commandavailable(request, proto)
490 490
491 491 # This command isn't available. Send an empty response and go
492 492 # back to waiting for a new command.
493 493 if not available:
494 494 _sshv1respondbytes(fout, b'')
495 495 continue
496 496
497 497 rsp = wireproto.dispatch(repo, proto, request)
498 498
499 499 if isinstance(rsp, bytes):
500 500 _sshv1respondbytes(fout, rsp)
501 501 elif isinstance(rsp, wireprototypes.bytesresponse):
502 502 _sshv1respondbytes(fout, rsp.data)
503 503 elif isinstance(rsp, wireprototypes.streamres):
504 504 _sshv1respondstream(fout, rsp)
505 505 elif isinstance(rsp, wireprototypes.streamreslegacy):
506 506 _sshv1respondstream(fout, rsp)
507 507 elif isinstance(rsp, wireprototypes.pushres):
508 508 _sshv1respondbytes(fout, b'')
509 509 _sshv1respondbytes(fout, b'%d' % rsp.res)
510 510 elif isinstance(rsp, wireprototypes.pusherr):
511 511 _sshv1respondbytes(fout, rsp.res)
512 512 elif isinstance(rsp, wireprototypes.ooberror):
513 513 _sshv1respondooberror(fout, ui.ferr, rsp.message)
514 514 else:
515 515 raise error.ProgrammingError('unhandled response type from '
516 516 'wire protocol command: %s' % rsp)
517 517
518 518 # For now, protocol version 2 serving just goes back to version 1.
519 519 elif state == 'protov2-serving':
520 520 state = 'protov1-serving'
521 521 continue
522 522
523 523 elif state == 'upgrade-initial':
524 524 # We should never transition into this state if we've switched
525 525 # protocols.
526 526 assert not protoswitched
527 527 assert proto.name == wireprototypes.SSHV1
528 528
529 529 # Expected: upgrade <token> <capabilities>
530 530 # If we get something else, the request is malformed. It could be
531 531 # from a future client that has altered the upgrade line content.
532 532 # We treat this as an unknown command.
533 533 try:
534 534 token, caps = request.split(b' ')[1:]
535 535 except ValueError:
536 536 _sshv1respondbytes(fout, b'')
537 537 state = 'protov1-serving'
538 538 continue
539 539
540 540 # Send empty response if we don't support upgrading protocols.
541 541 if not ui.configbool('experimental', 'sshserver.support-v2'):
542 542 _sshv1respondbytes(fout, b'')
543 543 state = 'protov1-serving'
544 544 continue
545 545
546 546 try:
547 547 caps = urlreq.parseqs(caps)
548 548 except ValueError:
549 549 _sshv1respondbytes(fout, b'')
550 550 state = 'protov1-serving'
551 551 continue
552 552
553 553 # We don't see an upgrade request to protocol version 2. Ignore
554 554 # the upgrade request.
555 555 wantedprotos = caps.get(b'proto', [b''])[0]
556 556 if SSHV2 not in wantedprotos:
557 557 _sshv1respondbytes(fout, b'')
558 558 state = 'protov1-serving'
559 559 continue
560 560
561 561 # It looks like we can honor this upgrade request to protocol 2.
562 562 # Filter the rest of the handshake protocol request lines.
563 563 state = 'upgrade-v2-filter-legacy-handshake'
564 564 continue
565 565
566 566 elif state == 'upgrade-v2-filter-legacy-handshake':
567 567 # Client should have sent legacy handshake after an ``upgrade``
568 568 # request. Expected lines:
569 569 #
570 570 # hello
571 571 # between
572 572 # pairs 81
573 573 # 0000...-0000...
574 574
575 575 ok = True
576 576 for line in (b'hello', b'between', b'pairs 81'):
577 577 request = fin.readline()[:-1]
578 578
579 579 if request != line:
580 580 _sshv1respondooberror(fout, ui.ferr,
581 581 b'malformed handshake protocol: '
582 582 b'missing %s' % line)
583 583 ok = False
584 584 state = 'shutdown'
585 585 break
586 586
587 587 if not ok:
588 588 continue
589 589
590 590 request = fin.read(81)
591 591 if request != b'%s-%s' % (b'0' * 40, b'0' * 40):
592 592 _sshv1respondooberror(fout, ui.ferr,
593 593 b'malformed handshake protocol: '
594 594 b'missing between argument value')
595 595 state = 'shutdown'
596 596 continue
597 597
598 598 state = 'upgrade-v2-finish'
599 599 continue
600 600
601 601 elif state == 'upgrade-v2-finish':
602 602 # Send the upgrade response.
603 603 fout.write(b'upgraded %s %s\n' % (token, SSHV2))
604 604 servercaps = wireproto.capabilities(repo, proto)
605 605 rsp = b'capabilities: %s' % servercaps.data
606 606 fout.write(b'%d\n%s\n' % (len(rsp), rsp))
607 607 fout.flush()
608 608
609 609 proto = sshv2protocolhandler(ui, fin, fout)
610 610 protoswitched = True
611 611
612 612 state = 'protov2-serving'
613 613 continue
614 614
615 615 elif state == 'shutdown':
616 616 break
617 617
618 618 else:
619 619 raise error.ProgrammingError('unhandled ssh server state: %s' %
620 620 state)
621 621
622 622 class sshserver(object):
623 623 def __init__(self, ui, repo, logfh=None):
624 624 self._ui = ui
625 625 self._repo = repo
626 626 self._fin = ui.fin
627 627 self._fout = ui.fout
628 628
629 629 # Log write I/O to stdout and stderr if configured.
630 630 if logfh:
631 631 self._fout = util.makeloggingfileobject(
632 632 logfh, self._fout, 'o', logdata=True)
633 633 ui.ferr = util.makeloggingfileobject(
634 634 logfh, ui.ferr, 'e', logdata=True)
635 635
636 636 hook.redirect(True)
637 637 ui.fout = repo.ui.fout = ui.ferr
638 638
639 639 # Prevent insertion/deletion of CRs
640 640 util.setbinary(self._fin)
641 641 util.setbinary(self._fout)
642 642
643 643 def serve_forever(self):
644 644 self.serveuntil(threading.Event())
645 645 sys.exit(0)
646 646
647 647 def serveuntil(self, ev):
648 648 """Serve until a threading.Event is set."""
649 649 _runsshserver(self._ui, self._repo, self._fin, self._fout, ev)
General Comments 0
You need to be logged in to leave comments. Login now