##// END OF EJS Templates
hgweb: create dedicated type for WSGI responses...
Gregory Szorc -
r36877:a88d68dc default
parent child Browse files
Show More
@@ -1,438 +1,440
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import contextlib
12 12 import os
13 13
14 14 from .common import (
15 15 ErrorResponse,
16 16 HTTP_BAD_REQUEST,
17 17 HTTP_NOT_FOUND,
18 18 HTTP_NOT_MODIFIED,
19 19 HTTP_OK,
20 20 HTTP_SERVER_ERROR,
21 21 caching,
22 22 cspvalues,
23 23 permhooks,
24 24 )
25 25
26 26 from .. import (
27 27 encoding,
28 28 error,
29 29 formatter,
30 30 hg,
31 31 hook,
32 32 profiling,
33 33 pycompat,
34 34 repoview,
35 35 templatefilters,
36 36 templater,
37 37 ui as uimod,
38 38 util,
39 39 wireprotoserver,
40 40 )
41 41
42 42 from . import (
43 43 request as requestmod,
44 44 webcommands,
45 45 webutil,
46 46 wsgicgi,
47 47 )
48 48
49 49 archivespecs = util.sortdict((
50 50 ('zip', ('application/zip', 'zip', '.zip', None)),
51 51 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
52 52 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
53 53 ))
54 54
55 55 def getstyle(req, configfn, templatepath):
56 56 fromreq = req.form.get('style', [None])[0]
57 57 styles = (
58 58 fromreq,
59 59 configfn('web', 'style'),
60 60 'paper',
61 61 )
62 62 return styles, templater.stylemap(styles, templatepath)
63 63
64 64 def makebreadcrumb(url, prefix=''):
65 65 '''Return a 'URL breadcrumb' list
66 66
67 67 A 'URL breadcrumb' is a list of URL-name pairs,
68 68 corresponding to each of the path items on a URL.
69 69 This can be used to create path navigation entries.
70 70 '''
71 71 if url.endswith('/'):
72 72 url = url[:-1]
73 73 if prefix:
74 74 url = '/' + prefix + url
75 75 relpath = url
76 76 if relpath.startswith('/'):
77 77 relpath = relpath[1:]
78 78
79 79 breadcrumb = []
80 80 urlel = url
81 81 pathitems = [''] + relpath.split('/')
82 82 for pathel in reversed(pathitems):
83 83 if not pathel or not urlel:
84 84 break
85 85 breadcrumb.append({'url': urlel, 'name': pathel})
86 86 urlel = os.path.dirname(urlel)
87 87 return reversed(breadcrumb)
88 88
89 89 class requestcontext(object):
90 90 """Holds state/context for an individual request.
91 91
92 92 Servers can be multi-threaded. Holding state on the WSGI application
93 93 is prone to race conditions. Instances of this class exist to hold
94 94 mutable and race-free state for requests.
95 95 """
96 96 def __init__(self, app, repo):
97 97 self.repo = repo
98 98 self.reponame = app.reponame
99 99
100 100 self.archivespecs = archivespecs
101 101
102 102 self.maxchanges = self.configint('web', 'maxchanges')
103 103 self.stripecount = self.configint('web', 'stripes')
104 104 self.maxshortchanges = self.configint('web', 'maxshortchanges')
105 105 self.maxfiles = self.configint('web', 'maxfiles')
106 106 self.allowpull = self.configbool('web', 'allow-pull')
107 107
108 108 # we use untrusted=False to prevent a repo owner from using
109 109 # web.templates in .hg/hgrc to get access to any file readable
110 110 # by the user running the CGI script
111 111 self.templatepath = self.config('web', 'templates', untrusted=False)
112 112
113 113 # This object is more expensive to build than simple config values.
114 114 # It is shared across requests. The app will replace the object
115 115 # if it is updated. Since this is a reference and nothing should
116 116 # modify the underlying object, it should be constant for the lifetime
117 117 # of the request.
118 118 self.websubtable = app.websubtable
119 119
120 120 self.csp, self.nonce = cspvalues(self.repo.ui)
121 121
122 122 # Trust the settings from the .hg/hgrc files by default.
123 123 def config(self, section, name, default=uimod._unset, untrusted=True):
124 124 return self.repo.ui.config(section, name, default,
125 125 untrusted=untrusted)
126 126
127 127 def configbool(self, section, name, default=uimod._unset, untrusted=True):
128 128 return self.repo.ui.configbool(section, name, default,
129 129 untrusted=untrusted)
130 130
131 131 def configint(self, section, name, default=uimod._unset, untrusted=True):
132 132 return self.repo.ui.configint(section, name, default,
133 133 untrusted=untrusted)
134 134
135 135 def configlist(self, section, name, default=uimod._unset, untrusted=True):
136 136 return self.repo.ui.configlist(section, name, default,
137 137 untrusted=untrusted)
138 138
139 139 def archivelist(self, nodeid):
140 140 allowed = self.configlist('web', 'allow_archive')
141 141 for typ, spec in self.archivespecs.iteritems():
142 142 if typ in allowed or self.configbool('web', 'allow%s' % typ):
143 143 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
144 144
145 145 def templater(self, wsgireq, req):
146 146 # determine scheme, port and server name
147 147 # this is needed to create absolute urls
148 148 logourl = self.config('web', 'logourl')
149 149 logoimg = self.config('web', 'logoimg')
150 150 staticurl = (self.config('web', 'staticurl')
151 151 or req.apppath + '/static/')
152 152 if not staticurl.endswith('/'):
153 153 staticurl += '/'
154 154
155 155 # some functions for the templater
156 156
157 157 def motd(**map):
158 158 yield self.config('web', 'motd')
159 159
160 160 # figure out which style to use
161 161
162 162 vars = {}
163 163 styles, (style, mapfile) = getstyle(wsgireq, self.config,
164 164 self.templatepath)
165 165 if style == styles[0]:
166 166 vars['style'] = style
167 167
168 168 sessionvars = webutil.sessionvars(vars, '?')
169 169
170 170 if not self.reponame:
171 171 self.reponame = (self.config('web', 'name', '')
172 172 or wsgireq.env.get('REPO_NAME')
173 173 or req.apppath or self.repo.root)
174 174
175 175 def websubfilter(text):
176 176 return templatefilters.websub(text, self.websubtable)
177 177
178 178 # create the templater
179 179 # TODO: export all keywords: defaults = templatekw.keywords.copy()
180 180 defaults = {
181 181 'url': req.apppath + '/',
182 182 'logourl': logourl,
183 183 'logoimg': logoimg,
184 184 'staticurl': staticurl,
185 185 'urlbase': req.advertisedbaseurl,
186 186 'repo': self.reponame,
187 187 'encoding': encoding.encoding,
188 188 'motd': motd,
189 189 'sessionvars': sessionvars,
190 190 'pathdef': makebreadcrumb(req.apppath),
191 191 'style': style,
192 192 'nonce': self.nonce,
193 193 }
194 194 tres = formatter.templateresources(self.repo.ui, self.repo)
195 195 tmpl = templater.templater.frommapfile(mapfile,
196 196 filters={'websub': websubfilter},
197 197 defaults=defaults,
198 198 resources=tres)
199 199 return tmpl
200 200
201 201
202 202 class hgweb(object):
203 203 """HTTP server for individual repositories.
204 204
205 205 Instances of this class serve HTTP responses for a particular
206 206 repository.
207 207
208 208 Instances are typically used as WSGI applications.
209 209
210 210 Some servers are multi-threaded. On these servers, there may
211 211 be multiple active threads inside __call__.
212 212 """
213 213 def __init__(self, repo, name=None, baseui=None):
214 214 if isinstance(repo, str):
215 215 if baseui:
216 216 u = baseui.copy()
217 217 else:
218 218 u = uimod.ui.load()
219 219 r = hg.repository(u, repo)
220 220 else:
221 221 # we trust caller to give us a private copy
222 222 r = repo
223 223
224 224 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
225 225 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
226 226 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
227 227 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
228 228 # resolve file patterns relative to repo root
229 229 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
230 230 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
231 231 # displaying bundling progress bar while serving feel wrong and may
232 232 # break some wsgi implementation.
233 233 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
234 234 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
235 235 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
236 236 self._lastrepo = self._repos[0]
237 237 hook.redirect(True)
238 238 self.reponame = name
239 239
240 240 def _webifyrepo(self, repo):
241 241 repo = getwebview(repo)
242 242 self.websubtable = webutil.getwebsubs(repo)
243 243 return repo
244 244
245 245 @contextlib.contextmanager
246 246 def _obtainrepo(self):
247 247 """Obtain a repo unique to the caller.
248 248
249 249 Internally we maintain a stack of cachedlocalrepo instances
250 250 to be handed out. If one is available, we pop it and return it,
251 251 ensuring it is up to date in the process. If one is not available,
252 252 we clone the most recently used repo instance and return it.
253 253
254 254 It is currently possible for the stack to grow without bounds
255 255 if the server allows infinite threads. However, servers should
256 256 have a thread limit, thus establishing our limit.
257 257 """
258 258 if self._repos:
259 259 cached = self._repos.pop()
260 260 r, created = cached.fetch()
261 261 else:
262 262 cached = self._lastrepo.copy()
263 263 r, created = cached.fetch()
264 264 if created:
265 265 r = self._webifyrepo(r)
266 266
267 267 self._lastrepo = cached
268 268 self.mtime = cached.mtime
269 269 try:
270 270 yield r
271 271 finally:
272 272 self._repos.append(cached)
273 273
274 274 def run(self):
275 275 """Start a server from CGI environment.
276 276
277 277 Modern servers should be using WSGI and should avoid this
278 278 method, if possible.
279 279 """
280 280 if not encoding.environ.get('GATEWAY_INTERFACE',
281 281 '').startswith("CGI/1."):
282 282 raise RuntimeError("This function is only intended to be "
283 283 "called while running as a CGI script.")
284 284 wsgicgi.launch(self)
285 285
286 286 def __call__(self, env, respond):
287 287 """Run the WSGI application.
288 288
289 289 This may be called by multiple threads.
290 290 """
291 291 req = requestmod.wsgirequest(env, respond)
292 292 return self.run_wsgi(req)
293 293
294 294 def run_wsgi(self, wsgireq):
295 295 """Internal method to run the WSGI application.
296 296
297 297 This is typically only called by Mercurial. External consumers
298 298 should be using instances of this class as the WSGI application.
299 299 """
300 300 with self._obtainrepo() as repo:
301 301 profile = repo.ui.configbool('profiling', 'enabled')
302 302 with profiling.profile(repo.ui, enabled=profile):
303 303 for r in self._runwsgi(wsgireq, repo):
304 304 yield r
305 305
306 306 def _runwsgi(self, wsgireq, repo):
307 307 req = wsgireq.req
308 res = wsgireq.res
308 309 rctx = requestcontext(self, repo)
309 310
310 311 # This state is global across all threads.
311 312 encoding.encoding = rctx.config('web', 'encoding')
312 313 rctx.repo.ui.environ = wsgireq.env
313 314
314 315 if rctx.csp:
315 316 # hgwebdir may have added CSP header. Since we generate our own,
316 317 # replace it.
317 318 wsgireq.headers = [h for h in wsgireq.headers
318 319 if h[0] != 'Content-Security-Policy']
319 320 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
321 res.headers['Content-Security-Policy'] = rctx.csp
320 322
321 handled, res = wireprotoserver.handlewsgirequest(
322 rctx, wsgireq, req, self.check_perm)
323 handled = wireprotoserver.handlewsgirequest(
324 rctx, wsgireq, req, res, self.check_perm)
323 325 if handled:
324 return res
326 return res.sendresponse()
325 327
326 328 if req.havepathinfo:
327 329 query = req.dispatchpath
328 330 else:
329 331 query = req.querystring.partition('&')[0].partition(';')[0]
330 332
331 333 # translate user-visible url structure to internal structure
332 334
333 335 args = query.split('/', 2)
334 336 if 'cmd' not in wsgireq.form and args and args[0]:
335 337 cmd = args.pop(0)
336 338 style = cmd.rfind('-')
337 339 if style != -1:
338 340 wsgireq.form['style'] = [cmd[:style]]
339 341 cmd = cmd[style + 1:]
340 342
341 343 # avoid accepting e.g. style parameter as command
342 344 if util.safehasattr(webcommands, cmd):
343 345 wsgireq.form['cmd'] = [cmd]
344 346
345 347 if cmd == 'static':
346 348 wsgireq.form['file'] = ['/'.join(args)]
347 349 else:
348 350 if args and args[0]:
349 351 node = args.pop(0).replace('%2F', '/')
350 352 wsgireq.form['node'] = [node]
351 353 if args:
352 354 wsgireq.form['file'] = args
353 355
354 356 ua = req.headers.get('User-Agent', '')
355 357 if cmd == 'rev' and 'mercurial' in ua:
356 358 wsgireq.form['style'] = ['raw']
357 359
358 360 if cmd == 'archive':
359 361 fn = wsgireq.form['node'][0]
360 362 for type_, spec in rctx.archivespecs.iteritems():
361 363 ext = spec[2]
362 364 if fn.endswith(ext):
363 365 wsgireq.form['node'] = [fn[:-len(ext)]]
364 366 wsgireq.form['type'] = [type_]
365 367 else:
366 368 cmd = wsgireq.form.get('cmd', [''])[0]
367 369
368 370 # process the web interface request
369 371
370 372 try:
371 373 tmpl = rctx.templater(wsgireq, req)
372 374 ctype = tmpl('mimetype', encoding=encoding.encoding)
373 375 ctype = templater.stringify(ctype)
374 376
375 377 # check read permissions non-static content
376 378 if cmd != 'static':
377 379 self.check_perm(rctx, wsgireq, None)
378 380
379 381 if cmd == '':
380 382 wsgireq.form['cmd'] = [tmpl.cache['default']]
381 383 cmd = wsgireq.form['cmd'][0]
382 384
383 385 # Don't enable caching if using a CSP nonce because then it wouldn't
384 386 # be a nonce.
385 387 if rctx.configbool('web', 'cache') and not rctx.nonce:
386 388 caching(self, wsgireq) # sets ETag header or raises NOT_MODIFIED
387 389 if cmd not in webcommands.__all__:
388 390 msg = 'no such method: %s' % cmd
389 391 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
390 392 elif cmd == 'file' and 'raw' in wsgireq.form.get('style', []):
391 393 rctx.ctype = ctype
392 394 content = webcommands.rawfile(rctx, wsgireq, tmpl)
393 395 else:
394 396 content = getattr(webcommands, cmd)(rctx, wsgireq, tmpl)
395 397 wsgireq.respond(HTTP_OK, ctype)
396 398
397 399 return content
398 400
399 401 except (error.LookupError, error.RepoLookupError) as err:
400 402 wsgireq.respond(HTTP_NOT_FOUND, ctype)
401 403 msg = pycompat.bytestr(err)
402 404 if (util.safehasattr(err, 'name') and
403 405 not isinstance(err, error.ManifestLookupError)):
404 406 msg = 'revision not found: %s' % err.name
405 407 return tmpl('error', error=msg)
406 408 except (error.RepoError, error.RevlogError) as inst:
407 409 wsgireq.respond(HTTP_SERVER_ERROR, ctype)
408 410 return tmpl('error', error=pycompat.bytestr(inst))
409 411 except ErrorResponse as inst:
410 412 wsgireq.respond(inst, ctype)
411 413 if inst.code == HTTP_NOT_MODIFIED:
412 414 # Not allowed to return a body on a 304
413 415 return ['']
414 416 return tmpl('error', error=pycompat.bytestr(inst))
415 417
416 418 def check_perm(self, rctx, req, op):
417 419 for permhook in permhooks:
418 420 permhook(rctx, req, op)
419 421
420 422 def getwebview(repo):
421 423 """The 'web.view' config controls changeset filter to hgweb. Possible
422 424 values are ``served``, ``visible`` and ``all``. Default is ``served``.
423 425 The ``served`` filter only shows changesets that can be pulled from the
424 426 hgweb instance. The``visible`` filter includes secret changesets but
425 427 still excludes "hidden" one.
426 428
427 429 See the repoview module for details.
428 430
429 431 The option has been around undocumented since Mercurial 2.5, but no
430 432 user ever asked about it. So we better keep it undocumented for now."""
431 433 # experimental config: web.view
432 434 viewconfig = repo.ui.config('web', 'view', untrusted=True)
433 435 if viewconfig == 'all':
434 436 return repo.unfiltered()
435 437 elif viewconfig in repoview.filtertable:
436 438 return repo.filtered(viewconfig)
437 439 else:
438 440 return repo.filtered('served')
@@ -1,339 +1,463
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 HTTP_NOT_MODIFIED,
19 19 statusmessage,
20 20 )
21 21
22 22 from ..thirdparty import (
23 23 attr,
24 24 )
25 25 from .. import (
26 error,
26 27 pycompat,
27 28 util,
28 29 )
29 30
30 31 @attr.s(frozen=True)
31 32 class parsedrequest(object):
32 33 """Represents a parsed WSGI request.
33 34
34 35 Contains both parsed parameters as well as a handle on the input stream.
35 36 """
36 37
37 38 # Request method.
38 39 method = attr.ib()
39 40 # Full URL for this request.
40 41 url = attr.ib()
41 42 # URL without any path components. Just <proto>://<host><port>.
42 43 baseurl = attr.ib()
43 44 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
44 45 # of HTTP: Host header for hostname. This is likely what clients used.
45 46 advertisedurl = attr.ib()
46 47 advertisedbaseurl = attr.ib()
47 48 # WSGI application path.
48 49 apppath = attr.ib()
49 50 # List of path parts to be used for dispatch.
50 51 dispatchparts = attr.ib()
51 52 # URL path component (no query string) used for dispatch.
52 53 dispatchpath = attr.ib()
53 54 # Whether there is a path component to this request. This can be true
54 55 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
55 56 havepathinfo = attr.ib()
56 57 # Raw query string (part after "?" in URL).
57 58 querystring = attr.ib()
58 59 # List of 2-tuples of query string arguments.
59 60 querystringlist = attr.ib()
60 61 # Dict of query string arguments. Values are lists with at least 1 item.
61 62 querystringdict = attr.ib()
62 63 # wsgiref.headers.Headers instance. Operates like a dict with case
63 64 # insensitive keys.
64 65 headers = attr.ib()
65 66 # Request body input stream.
66 67 bodyfh = attr.ib()
67 68
68 69 def parserequestfromenv(env, bodyfh):
69 70 """Parse URL components from environment variables.
70 71
71 72 WSGI defines request attributes via environment variables. This function
72 73 parses the environment variables into a data structure.
73 74 """
74 75 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
75 76
76 77 # We first validate that the incoming object conforms with the WSGI spec.
77 78 # We only want to be dealing with spec-conforming WSGI implementations.
78 79 # TODO enable this once we fix internal violations.
79 80 #wsgiref.validate.check_environ(env)
80 81
81 82 # PEP-0333 states that environment keys and values are native strings
82 83 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
83 84 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
84 85 # in Mercurial, so mass convert string keys and values to bytes.
85 86 if pycompat.ispy3:
86 87 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
87 88 env = {k: v.encode('latin-1') if isinstance(v, str) else v
88 89 for k, v in env.iteritems()}
89 90
90 91 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
91 92 # the environment variables.
92 93 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
93 94 # how URLs are reconstructed.
94 95 fullurl = env['wsgi.url_scheme'] + '://'
95 96 advertisedfullurl = fullurl
96 97
97 98 def addport(s):
98 99 if env['wsgi.url_scheme'] == 'https':
99 100 if env['SERVER_PORT'] != '443':
100 101 s += ':' + env['SERVER_PORT']
101 102 else:
102 103 if env['SERVER_PORT'] != '80':
103 104 s += ':' + env['SERVER_PORT']
104 105
105 106 return s
106 107
107 108 if env.get('HTTP_HOST'):
108 109 fullurl += env['HTTP_HOST']
109 110 else:
110 111 fullurl += env['SERVER_NAME']
111 112 fullurl = addport(fullurl)
112 113
113 114 advertisedfullurl += env['SERVER_NAME']
114 115 advertisedfullurl = addport(advertisedfullurl)
115 116
116 117 baseurl = fullurl
117 118 advertisedbaseurl = advertisedfullurl
118 119
119 120 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
120 121 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
121 122 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
122 123 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
123 124
124 125 if env.get('QUERY_STRING'):
125 126 fullurl += '?' + env['QUERY_STRING']
126 127 advertisedfullurl += '?' + env['QUERY_STRING']
127 128
128 129 # When dispatching requests, we look at the URL components (PATH_INFO
129 130 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
130 131 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
131 132 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
132 133 # root. We also exclude its path components from PATH_INFO when resolving
133 134 # the dispatch path.
134 135
135 136 apppath = env['SCRIPT_NAME']
136 137
137 138 if env.get('REPO_NAME'):
138 139 if not apppath.endswith('/'):
139 140 apppath += '/'
140 141
141 142 apppath += env.get('REPO_NAME')
142 143
143 144 if 'PATH_INFO' in env:
144 145 dispatchparts = env['PATH_INFO'].strip('/').split('/')
145 146
146 147 # Strip out repo parts.
147 148 repoparts = env.get('REPO_NAME', '').split('/')
148 149 if dispatchparts[:len(repoparts)] == repoparts:
149 150 dispatchparts = dispatchparts[len(repoparts):]
150 151 else:
151 152 dispatchparts = []
152 153
153 154 dispatchpath = '/'.join(dispatchparts)
154 155
155 156 querystring = env.get('QUERY_STRING', '')
156 157
157 158 # We store as a list so we have ordering information. We also store as
158 159 # a dict to facilitate fast lookup.
159 160 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
160 161
161 162 querystringdict = {}
162 163 for k, v in querystringlist:
163 164 if k in querystringdict:
164 165 querystringdict[k].append(v)
165 166 else:
166 167 querystringdict[k] = [v]
167 168
168 169 # HTTP_* keys contain HTTP request headers. The Headers structure should
169 170 # perform case normalization for us. We just rewrite underscore to dash
170 171 # so keys match what likely went over the wire.
171 172 headers = []
172 173 for k, v in env.iteritems():
173 174 if k.startswith('HTTP_'):
174 175 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
175 176
176 177 headers = wsgiheaders.Headers(headers)
177 178
178 179 # This is kind of a lie because the HTTP header wasn't explicitly
179 180 # sent. But for all intents and purposes it should be OK to lie about
180 181 # this, since a consumer will either either value to determine how many
181 182 # bytes are available to read.
182 183 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
183 184 headers['Content-Length'] = env['CONTENT_LENGTH']
184 185
185 186 # TODO do this once we remove wsgirequest.inp, otherwise we could have
186 187 # multiple readers from the underlying input stream.
187 188 #bodyfh = env['wsgi.input']
188 189 #if 'Content-Length' in headers:
189 190 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
190 191
191 192 return parsedrequest(method=env['REQUEST_METHOD'],
192 193 url=fullurl, baseurl=baseurl,
193 194 advertisedurl=advertisedfullurl,
194 195 advertisedbaseurl=advertisedbaseurl,
195 196 apppath=apppath,
196 197 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
197 198 havepathinfo='PATH_INFO' in env,
198 199 querystring=querystring,
199 200 querystringlist=querystringlist,
200 201 querystringdict=querystringdict,
201 202 headers=headers,
202 203 bodyfh=bodyfh)
203 204
205 class wsgiresponse(object):
206 """Represents a response to a WSGI request.
207
208 A response consists of a status line, headers, and a body.
209
210 Consumers must populate the ``status`` and ``headers`` fields and
211 make a call to a ``setbody*()`` method before the response can be
212 issued.
213
214 When it is time to start sending the response over the wire,
215 ``sendresponse()`` is called. It handles emitting the header portion
216 of the response message. It then yields chunks of body data to be
217 written to the peer. Typically, the WSGI application itself calls
218 and returns the value from ``sendresponse()``.
219 """
220
221 def __init__(self, req, startresponse):
222 """Create an empty response tied to a specific request.
223
224 ``req`` is a ``parsedrequest``. ``startresponse`` is the
225 ``start_response`` function passed to the WSGI application.
226 """
227 self._req = req
228 self._startresponse = startresponse
229
230 self.status = None
231 self.headers = wsgiheaders.Headers([])
232
233 self._bodybytes = None
234 self._bodygen = None
235 self._started = False
236
237 def setbodybytes(self, b):
238 """Define the response body as static bytes."""
239 if self._bodybytes is not None or self._bodygen is not None:
240 raise error.ProgrammingError('cannot define body multiple times')
241
242 self._bodybytes = b
243 self.headers['Content-Length'] = '%d' % len(b)
244
245 def setbodygen(self, gen):
246 """Define the response body as a generator of bytes."""
247 if self._bodybytes is not None or self._bodygen is not None:
248 raise error.ProgrammingError('cannot define body multiple times')
249
250 self._bodygen = gen
251
252 def sendresponse(self):
253 """Send the generated response to the client.
254
255 Before this is called, ``status`` must be set and one of
256 ``setbodybytes()`` or ``setbodygen()`` must be called.
257
258 Calling this method multiple times is not allowed.
259 """
260 if self._started:
261 raise error.ProgrammingError('sendresponse() called multiple times')
262
263 self._started = True
264
265 if not self.status:
266 raise error.ProgrammingError('status line not defined')
267
268 if self._bodybytes is None and self._bodygen is None:
269 raise error.ProgrammingError('response body not defined')
270
271 # Various HTTP clients (notably httplib) won't read the HTTP response
272 # until the HTTP request has been sent in full. If servers (us) send a
273 # response before the HTTP request has been fully sent, the connection
274 # may deadlock because neither end is reading.
275 #
276 # We work around this by "draining" the request data before
277 # sending any response in some conditions.
278 drain = False
279 close = False
280
281 # If the client sent Expect: 100-continue, we assume it is smart enough
282 # to deal with the server sending a response before reading the request.
283 # (httplib doesn't do this.)
284 if self._req.headers.get('Expect', '').lower() == '100-continue':
285 pass
286 # Only tend to request methods that have bodies. Strictly speaking,
287 # we should sniff for a body. But this is fine for our existing
288 # WSGI applications.
289 elif self._req.method not in ('POST', 'PUT'):
290 pass
291 else:
292 # If we don't know how much data to read, there's no guarantee
293 # that we can drain the request responsibly. The WSGI
294 # specification only says that servers *should* ensure the
295 # input stream doesn't overrun the actual request. So there's
296 # no guarantee that reading until EOF won't corrupt the stream
297 # state.
298 if not isinstance(self._req.bodyfh, util.cappedreader):
299 close = True
300 else:
301 # We /could/ only drain certain HTTP response codes. But 200 and
302 # non-200 wire protocol responses both require draining. Since
303 # we have a capped reader in place for all situations where we
304 # drain, it is safe to read from that stream. We'll either do
305 # a drain or no-op if we're already at EOF.
306 drain = True
307
308 if close:
309 self.headers['Connection'] = 'Close'
310
311 if drain:
312 assert isinstance(self._req.bodyfh, util.cappedreader)
313 while True:
314 chunk = self._req.bodyfh.read(32768)
315 if not chunk:
316 break
317
318 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
319 if self._bodybytes:
320 yield self._bodybytes
321 elif self._bodygen:
322 for chunk in self._bodygen:
323 yield chunk
324 else:
325 error.ProgrammingError('do not know how to send body')
326
204 327 class wsgirequest(object):
205 328 """Higher-level API for a WSGI request.
206 329
207 330 WSGI applications are invoked with 2 arguments. They are used to
208 331 instantiate instances of this class, which provides higher-level APIs
209 332 for obtaining request parameters, writing HTTP output, etc.
210 333 """
211 334 def __init__(self, wsgienv, start_response):
212 335 version = wsgienv[r'wsgi.version']
213 336 if (version < (1, 0)) or (version >= (2, 0)):
214 337 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
215 338 % version)
216 339
217 340 inp = wsgienv[r'wsgi.input']
218 341
219 342 if r'HTTP_CONTENT_LENGTH' in wsgienv:
220 343 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
221 344 elif r'CONTENT_LENGTH' in wsgienv:
222 345 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
223 346
224 347 self.err = wsgienv[r'wsgi.errors']
225 348 self.threaded = wsgienv[r'wsgi.multithread']
226 349 self.multiprocess = wsgienv[r'wsgi.multiprocess']
227 350 self.run_once = wsgienv[r'wsgi.run_once']
228 351 self.env = wsgienv
229 352 self.req = parserequestfromenv(wsgienv, inp)
230 353 self.form = self.req.querystringdict
354 self.res = wsgiresponse(self.req, start_response)
231 355 self._start_response = start_response
232 356 self.server_write = None
233 357 self.headers = []
234 358
235 359 def respond(self, status, type, filename=None, body=None):
236 360 if not isinstance(type, str):
237 361 type = pycompat.sysstr(type)
238 362 if self._start_response is not None:
239 363 self.headers.append((r'Content-Type', type))
240 364 if filename:
241 365 filename = (filename.rpartition('/')[-1]
242 366 .replace('\\', '\\\\').replace('"', '\\"'))
243 367 self.headers.append(('Content-Disposition',
244 368 'inline; filename="%s"' % filename))
245 369 if body is not None:
246 370 self.headers.append((r'Content-Length', str(len(body))))
247 371
248 372 for k, v in self.headers:
249 373 if not isinstance(v, str):
250 374 raise TypeError('header value must be string: %r' % (v,))
251 375
252 376 if isinstance(status, ErrorResponse):
253 377 self.headers.extend(status.headers)
254 378 if status.code == HTTP_NOT_MODIFIED:
255 379 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
256 380 # it MUST NOT include any headers other than these and no
257 381 # body
258 382 self.headers = [(k, v) for (k, v) in self.headers if
259 383 k in ('Date', 'ETag', 'Expires',
260 384 'Cache-Control', 'Vary')]
261 385 status = statusmessage(status.code, pycompat.bytestr(status))
262 386 elif status == 200:
263 387 status = '200 Script output follows'
264 388 elif isinstance(status, int):
265 389 status = statusmessage(status)
266 390
267 391 # Various HTTP clients (notably httplib) won't read the HTTP
268 392 # response until the HTTP request has been sent in full. If servers
269 393 # (us) send a response before the HTTP request has been fully sent,
270 394 # the connection may deadlock because neither end is reading.
271 395 #
272 396 # We work around this by "draining" the request data before
273 397 # sending any response in some conditions.
274 398 drain = False
275 399 close = False
276 400
277 401 # If the client sent Expect: 100-continue, we assume it is smart
278 402 # enough to deal with the server sending a response before reading
279 403 # the request. (httplib doesn't do this.)
280 404 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
281 405 pass
282 406 # Only tend to request methods that have bodies. Strictly speaking,
283 407 # we should sniff for a body. But this is fine for our existing
284 408 # WSGI applications.
285 409 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
286 410 pass
287 411 else:
288 412 # If we don't know how much data to read, there's no guarantee
289 413 # that we can drain the request responsibly. The WSGI
290 414 # specification only says that servers *should* ensure the
291 415 # input stream doesn't overrun the actual request. So there's
292 416 # no guarantee that reading until EOF won't corrupt the stream
293 417 # state.
294 418 if not isinstance(self.req.bodyfh, util.cappedreader):
295 419 close = True
296 420 else:
297 421 # We /could/ only drain certain HTTP response codes. But 200
298 422 # and non-200 wire protocol responses both require draining.
299 423 # Since we have a capped reader in place for all situations
300 424 # where we drain, it is safe to read from that stream. We'll
301 425 # either do a drain or no-op if we're already at EOF.
302 426 drain = True
303 427
304 428 if close:
305 429 self.headers.append((r'Connection', r'Close'))
306 430
307 431 if drain:
308 432 assert isinstance(self.req.bodyfh, util.cappedreader)
309 433 while True:
310 434 chunk = self.req.bodyfh.read(32768)
311 435 if not chunk:
312 436 break
313 437
314 438 self.server_write = self._start_response(
315 439 pycompat.sysstr(status), self.headers)
316 440 self._start_response = None
317 441 self.headers = []
318 442 if body is not None:
319 443 self.write(body)
320 444 self.server_write = None
321 445
322 446 def write(self, thing):
323 447 if thing:
324 448 try:
325 449 self.server_write(thing)
326 450 except socket.error as inst:
327 451 if inst[0] != errno.ECONNRESET:
328 452 raise
329 453
330 454 def flush(self):
331 455 return None
332 456
333 457 def wsgiapplication(app_maker):
334 458 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
335 459 can and should now be used as a WSGI application.'''
336 460 application = app_maker()
337 461 def run_wsgi(env, respond):
338 462 return application(env, respond)
339 463 return run_wsgi
@@ -1,649 +1,655
1 1 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
2 2 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 3 #
4 4 # This software may be used and distributed according to the terms of the
5 5 # GNU General Public License version 2 or any later version.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import struct
11 11 import sys
12 12 import threading
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 hook,
19 19 pycompat,
20 20 util,
21 21 wireproto,
22 22 wireprototypes,
23 23 )
24 24
25 25 stringio = util.stringio
26 26
27 27 urlerr = util.urlerr
28 28 urlreq = util.urlreq
29 29
30 30 HTTP_OK = 200
31 31
32 32 HGTYPE = 'application/mercurial-0.1'
33 33 HGTYPE2 = 'application/mercurial-0.2'
34 34 HGERRTYPE = 'application/hg-error'
35 35
36 36 SSHV1 = wireprototypes.SSHV1
37 37 SSHV2 = wireprototypes.SSHV2
38 38
39 39 def decodevaluefromheaders(req, headerprefix):
40 40 """Decode a long value from multiple HTTP request headers.
41 41
42 42 Returns the value as a bytes, not a str.
43 43 """
44 44 chunks = []
45 45 i = 1
46 46 while True:
47 47 v = req.headers.get(b'%s-%d' % (headerprefix, i))
48 48 if v is None:
49 49 break
50 50 chunks.append(pycompat.bytesurl(v))
51 51 i += 1
52 52
53 53 return ''.join(chunks)
54 54
55 55 class httpv1protocolhandler(wireprototypes.baseprotocolhandler):
56 56 def __init__(self, wsgireq, req, ui, checkperm):
57 57 self._wsgireq = wsgireq
58 58 self._req = req
59 59 self._ui = ui
60 60 self._checkperm = checkperm
61 61
62 62 @property
63 63 def name(self):
64 64 return 'http-v1'
65 65
66 66 def getargs(self, args):
67 67 knownargs = self._args()
68 68 data = {}
69 69 keys = args.split()
70 70 for k in keys:
71 71 if k == '*':
72 72 star = {}
73 73 for key in knownargs.keys():
74 74 if key != 'cmd' and key not in keys:
75 75 star[key] = knownargs[key][0]
76 76 data['*'] = star
77 77 else:
78 78 data[k] = knownargs[k][0]
79 79 return [data[k] for k in keys]
80 80
81 81 def _args(self):
82 82 args = util.rapply(pycompat.bytesurl, self._wsgireq.form.copy())
83 83 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
84 84 if postlen:
85 85 args.update(urlreq.parseqs(
86 86 self._req.bodyfh.read(postlen), keep_blank_values=True))
87 87 return args
88 88
89 89 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
90 90 args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
91 91 return args
92 92
93 93 def forwardpayload(self, fp):
94 94 # Existing clients *always* send Content-Length.
95 95 length = int(self._req.headers[b'Content-Length'])
96 96
97 97 # If httppostargs is used, we need to read Content-Length
98 98 # minus the amount that was consumed by args.
99 99 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
100 100 for s in util.filechunkiter(self._req.bodyfh, limit=length):
101 101 fp.write(s)
102 102
103 103 @contextlib.contextmanager
104 104 def mayberedirectstdio(self):
105 105 oldout = self._ui.fout
106 106 olderr = self._ui.ferr
107 107
108 108 out = util.stringio()
109 109
110 110 try:
111 111 self._ui.fout = out
112 112 self._ui.ferr = out
113 113 yield out
114 114 finally:
115 115 self._ui.fout = oldout
116 116 self._ui.ferr = olderr
117 117
118 118 def client(self):
119 119 return 'remote:%s:%s:%s' % (
120 120 self._wsgireq.env.get('wsgi.url_scheme') or 'http',
121 121 urlreq.quote(self._wsgireq.env.get('REMOTE_HOST', '')),
122 122 urlreq.quote(self._wsgireq.env.get('REMOTE_USER', '')))
123 123
124 124 def addcapabilities(self, repo, caps):
125 125 caps.append('httpheader=%d' %
126 126 repo.ui.configint('server', 'maxhttpheaderlen'))
127 127 if repo.ui.configbool('experimental', 'httppostargs'):
128 128 caps.append('httppostargs')
129 129
130 130 # FUTURE advertise 0.2rx once support is implemented
131 131 # FUTURE advertise minrx and mintx after consulting config option
132 132 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
133 133
134 134 compengines = wireproto.supportedcompengines(repo.ui, util.SERVERROLE)
135 135 if compengines:
136 136 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
137 137 for e in compengines)
138 138 caps.append('compression=%s' % comptypes)
139 139
140 140 return caps
141 141
142 142 def checkperm(self, perm):
143 143 return self._checkperm(perm)
144 144
145 145 # This method exists mostly so that extensions like remotefilelog can
146 146 # disable a kludgey legacy method only over http. As of early 2018,
147 147 # there are no other known users, so with any luck we can discard this
148 148 # hook if remotefilelog becomes a first-party extension.
149 149 def iscmd(cmd):
150 150 return cmd in wireproto.commands
151 151
152 def handlewsgirequest(rctx, wsgireq, req, checkperm):
152 def handlewsgirequest(rctx, wsgireq, req, res, checkperm):
153 153 """Possibly process a wire protocol request.
154 154
155 155 If the current request is a wire protocol request, the request is
156 156 processed by this function.
157 157
158 158 ``wsgireq`` is a ``wsgirequest`` instance.
159 159 ``req`` is a ``parsedrequest`` instance.
160 ``res`` is a ``wsgiresponse`` instance.
160 161
161 Returns a 2-tuple of (bool, response) where the 1st element indicates
162 whether the request was handled and the 2nd element is a return
163 value for a WSGI application (often a generator of bytes).
162 Returns a bool indicating if the request was serviced. If set, the caller
163 should stop processing the request, as a response has already been issued.
164 164 """
165 165 # Avoid cycle involving hg module.
166 166 from .hgweb import common as hgwebcommon
167 167
168 168 repo = rctx.repo
169 169
170 170 # HTTP version 1 wire protocol requests are denoted by a "cmd" query
171 171 # string parameter. If it isn't present, this isn't a wire protocol
172 172 # request.
173 173 if 'cmd' not in req.querystringdict:
174 return False, None
174 return False
175 175
176 176 cmd = req.querystringdict['cmd'][0]
177 177
178 178 # The "cmd" request parameter is used by both the wire protocol and hgweb.
179 179 # While not all wire protocol commands are available for all transports,
180 180 # if we see a "cmd" value that resembles a known wire protocol command, we
181 181 # route it to a protocol handler. This is better than routing possible
182 182 # wire protocol requests to hgweb because it prevents hgweb from using
183 183 # known wire protocol commands and it is less confusing for machine
184 184 # clients.
185 185 if not iscmd(cmd):
186 return False, None
186 return False
187 187
188 188 # The "cmd" query string argument is only valid on the root path of the
189 189 # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo
190 190 # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request
191 191 # in this case. We send an HTTP 404 for backwards compatibility reasons.
192 192 if req.dispatchpath:
193 res = _handlehttperror(
194 hgwebcommon.ErrorResponse(hgwebcommon.HTTP_NOT_FOUND), wsgireq,
195 req)
196
197 return True, res
193 res.status = hgwebcommon.statusmessage(404)
194 res.headers['Content-Type'] = HGTYPE
195 # TODO This is not a good response to issue for this request. This
196 # is mostly for BC for now.
197 res.setbodybytes('0\n%s\n' % b'Not Found')
198 return True
198 199
199 200 proto = httpv1protocolhandler(wsgireq, req, repo.ui,
200 201 lambda perm: checkperm(rctx, wsgireq, perm))
201 202
202 203 # The permissions checker should be the only thing that can raise an
203 204 # ErrorResponse. It is kind of a layer violation to catch an hgweb
204 205 # exception here. So consider refactoring into a exception type that
205 206 # is associated with the wire protocol.
206 207 try:
207 res = _callhttp(repo, wsgireq, req, proto, cmd)
208 _callhttp(repo, wsgireq, req, res, proto, cmd)
208 209 except hgwebcommon.ErrorResponse as e:
209 res = _handlehttperror(e, wsgireq, req)
210 for k, v in e.headers:
211 res.headers[k] = v
212 res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
213 # TODO This response body assumes the failed command was
214 # "unbundle." That assumption is not always valid.
215 res.setbodybytes('0\n%s\n' % pycompat.bytestr(e))
210 216
211 return True, res
217 return True
212 218
213 219 def _httpresponsetype(ui, req, prefer_uncompressed):
214 220 """Determine the appropriate response type and compression settings.
215 221
216 222 Returns a tuple of (mediatype, compengine, engineopts).
217 223 """
218 224 # Determine the response media type and compression engine based
219 225 # on the request parameters.
220 226 protocaps = decodevaluefromheaders(req, 'X-HgProto').split(' ')
221 227
222 228 if '0.2' in protocaps:
223 229 # All clients are expected to support uncompressed data.
224 230 if prefer_uncompressed:
225 231 return HGTYPE2, util._noopengine(), {}
226 232
227 233 # Default as defined by wire protocol spec.
228 234 compformats = ['zlib', 'none']
229 235 for cap in protocaps:
230 236 if cap.startswith('comp='):
231 237 compformats = cap[5:].split(',')
232 238 break
233 239
234 240 # Now find an agreed upon compression format.
235 241 for engine in wireproto.supportedcompengines(ui, util.SERVERROLE):
236 242 if engine.wireprotosupport().name in compformats:
237 243 opts = {}
238 244 level = ui.configint('server', '%slevel' % engine.name())
239 245 if level is not None:
240 246 opts['level'] = level
241 247
242 248 return HGTYPE2, engine, opts
243 249
244 250 # No mutually supported compression format. Fall back to the
245 251 # legacy protocol.
246 252
247 253 # Don't allow untrusted settings because disabling compression or
248 254 # setting a very high compression level could lead to flooding
249 255 # the server's network or CPU.
250 256 opts = {'level': ui.configint('server', 'zliblevel')}
251 257 return HGTYPE, util.compengines['zlib'], opts
252 258
253 def _callhttp(repo, wsgireq, req, proto, cmd):
259 def _callhttp(repo, wsgireq, req, res, proto, cmd):
260 # Avoid cycle involving hg module.
261 from .hgweb import common as hgwebcommon
262
254 263 def genversion2(gen, engine, engineopts):
255 264 # application/mercurial-0.2 always sends a payload header
256 265 # identifying the compression engine.
257 266 name = engine.wireprotosupport().name
258 267 assert 0 < len(name) < 256
259 268 yield struct.pack('B', len(name))
260 269 yield name
261 270
262 271 for chunk in gen:
263 272 yield chunk
264 273
274 def setresponse(code, contenttype, bodybytes=None, bodygen=None):
275 if code == HTTP_OK:
276 res.status = '200 Script output follows'
277 else:
278 res.status = hgwebcommon.statusmessage(code)
279
280 res.headers['Content-Type'] = contenttype
281
282 if bodybytes is not None:
283 res.setbodybytes(bodybytes)
284 if bodygen is not None:
285 res.setbodygen(bodygen)
286
265 287 if not wireproto.commands.commandavailable(cmd, proto):
266 wsgireq.respond(HTTP_OK, HGERRTYPE,
267 body=_('requested wire protocol command is not '
268 'available over HTTP'))
269 return []
288 setresponse(HTTP_OK, HGERRTYPE,
289 _('requested wire protocol command is not available over '
290 'HTTP'))
291 return
270 292
271 293 proto.checkperm(wireproto.commands[cmd].permission)
272 294
273 295 rsp = wireproto.dispatch(repo, proto, cmd)
274 296
275 297 if isinstance(rsp, bytes):
276 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
277 return []
298 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
278 299 elif isinstance(rsp, wireprototypes.bytesresponse):
279 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp.data)
280 return []
300 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp.data)
281 301 elif isinstance(rsp, wireprototypes.streamreslegacy):
282 gen = rsp.gen
283 wsgireq.respond(HTTP_OK, HGTYPE)
284 return gen
302 setresponse(HTTP_OK, HGTYPE, bodygen=rsp.gen)
285 303 elif isinstance(rsp, wireprototypes.streamres):
286 304 gen = rsp.gen
287 305
288 306 # This code for compression should not be streamres specific. It
289 307 # is here because we only compress streamres at the moment.
290 308 mediatype, engine, engineopts = _httpresponsetype(
291 309 repo.ui, req, rsp.prefer_uncompressed)
292 310 gen = engine.compressstream(gen, engineopts)
293 311
294 312 if mediatype == HGTYPE2:
295 313 gen = genversion2(gen, engine, engineopts)
296 314
297 wsgireq.respond(HTTP_OK, mediatype)
298 return gen
315 setresponse(HTTP_OK, mediatype, bodygen=gen)
299 316 elif isinstance(rsp, wireprototypes.pushres):
300 317 rsp = '%d\n%s' % (rsp.res, rsp.output)
301 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
302 return []
318 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
303 319 elif isinstance(rsp, wireprototypes.pusherr):
304 320 rsp = '0\n%s\n' % rsp.res
305 wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
306 return []
321 res.drain = True
322 setresponse(HTTP_OK, HGTYPE, bodybytes=rsp)
307 323 elif isinstance(rsp, wireprototypes.ooberror):
308 rsp = rsp.message
309 wsgireq.respond(HTTP_OK, HGERRTYPE, body=rsp)
310 return []
311 raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
312
313 def _handlehttperror(e, wsgireq, req):
314 """Called when an ErrorResponse is raised during HTTP request processing."""
315
316 # TODO This response body assumes the failed command was
317 # "unbundle." That assumption is not always valid.
318 wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e))
319
320 return ''
324 setresponse(HTTP_OK, HGERRTYPE, bodybytes=rsp.message)
325 else:
326 raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
321 327
322 328 def _sshv1respondbytes(fout, value):
323 329 """Send a bytes response for protocol version 1."""
324 330 fout.write('%d\n' % len(value))
325 331 fout.write(value)
326 332 fout.flush()
327 333
328 334 def _sshv1respondstream(fout, source):
329 335 write = fout.write
330 336 for chunk in source.gen:
331 337 write(chunk)
332 338 fout.flush()
333 339
334 340 def _sshv1respondooberror(fout, ferr, rsp):
335 341 ferr.write(b'%s\n-\n' % rsp)
336 342 ferr.flush()
337 343 fout.write(b'\n')
338 344 fout.flush()
339 345
340 346 class sshv1protocolhandler(wireprototypes.baseprotocolhandler):
341 347 """Handler for requests services via version 1 of SSH protocol."""
342 348 def __init__(self, ui, fin, fout):
343 349 self._ui = ui
344 350 self._fin = fin
345 351 self._fout = fout
346 352
347 353 @property
348 354 def name(self):
349 355 return wireprototypes.SSHV1
350 356
351 357 def getargs(self, args):
352 358 data = {}
353 359 keys = args.split()
354 360 for n in xrange(len(keys)):
355 361 argline = self._fin.readline()[:-1]
356 362 arg, l = argline.split()
357 363 if arg not in keys:
358 364 raise error.Abort(_("unexpected parameter %r") % arg)
359 365 if arg == '*':
360 366 star = {}
361 367 for k in xrange(int(l)):
362 368 argline = self._fin.readline()[:-1]
363 369 arg, l = argline.split()
364 370 val = self._fin.read(int(l))
365 371 star[arg] = val
366 372 data['*'] = star
367 373 else:
368 374 val = self._fin.read(int(l))
369 375 data[arg] = val
370 376 return [data[k] for k in keys]
371 377
372 378 def forwardpayload(self, fpout):
373 379 # We initially send an empty response. This tells the client it is
374 380 # OK to start sending data. If a client sees any other response, it
375 381 # interprets it as an error.
376 382 _sshv1respondbytes(self._fout, b'')
377 383
378 384 # The file is in the form:
379 385 #
380 386 # <chunk size>\n<chunk>
381 387 # ...
382 388 # 0\n
383 389 count = int(self._fin.readline())
384 390 while count:
385 391 fpout.write(self._fin.read(count))
386 392 count = int(self._fin.readline())
387 393
388 394 @contextlib.contextmanager
389 395 def mayberedirectstdio(self):
390 396 yield None
391 397
392 398 def client(self):
393 399 client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
394 400 return 'remote:ssh:' + client
395 401
396 402 def addcapabilities(self, repo, caps):
397 403 return caps
398 404
399 405 def checkperm(self, perm):
400 406 pass
401 407
402 408 class sshv2protocolhandler(sshv1protocolhandler):
403 409 """Protocol handler for version 2 of the SSH protocol."""
404 410
405 411 @property
406 412 def name(self):
407 413 return wireprototypes.SSHV2
408 414
409 415 def _runsshserver(ui, repo, fin, fout, ev):
410 416 # This function operates like a state machine of sorts. The following
411 417 # states are defined:
412 418 #
413 419 # protov1-serving
414 420 # Server is in protocol version 1 serving mode. Commands arrive on
415 421 # new lines. These commands are processed in this state, one command
416 422 # after the other.
417 423 #
418 424 # protov2-serving
419 425 # Server is in protocol version 2 serving mode.
420 426 #
421 427 # upgrade-initial
422 428 # The server is going to process an upgrade request.
423 429 #
424 430 # upgrade-v2-filter-legacy-handshake
425 431 # The protocol is being upgraded to version 2. The server is expecting
426 432 # the legacy handshake from version 1.
427 433 #
428 434 # upgrade-v2-finish
429 435 # The upgrade to version 2 of the protocol is imminent.
430 436 #
431 437 # shutdown
432 438 # The server is shutting down, possibly in reaction to a client event.
433 439 #
434 440 # And here are their transitions:
435 441 #
436 442 # protov1-serving -> shutdown
437 443 # When server receives an empty request or encounters another
438 444 # error.
439 445 #
440 446 # protov1-serving -> upgrade-initial
441 447 # An upgrade request line was seen.
442 448 #
443 449 # upgrade-initial -> upgrade-v2-filter-legacy-handshake
444 450 # Upgrade to version 2 in progress. Server is expecting to
445 451 # process a legacy handshake.
446 452 #
447 453 # upgrade-v2-filter-legacy-handshake -> shutdown
448 454 # Client did not fulfill upgrade handshake requirements.
449 455 #
450 456 # upgrade-v2-filter-legacy-handshake -> upgrade-v2-finish
451 457 # Client fulfilled version 2 upgrade requirements. Finishing that
452 458 # upgrade.
453 459 #
454 460 # upgrade-v2-finish -> protov2-serving
455 461 # Protocol upgrade to version 2 complete. Server can now speak protocol
456 462 # version 2.
457 463 #
458 464 # protov2-serving -> protov1-serving
459 465 # Ths happens by default since protocol version 2 is the same as
460 466 # version 1 except for the handshake.
461 467
462 468 state = 'protov1-serving'
463 469 proto = sshv1protocolhandler(ui, fin, fout)
464 470 protoswitched = False
465 471
466 472 while not ev.is_set():
467 473 if state == 'protov1-serving':
468 474 # Commands are issued on new lines.
469 475 request = fin.readline()[:-1]
470 476
471 477 # Empty lines signal to terminate the connection.
472 478 if not request:
473 479 state = 'shutdown'
474 480 continue
475 481
476 482 # It looks like a protocol upgrade request. Transition state to
477 483 # handle it.
478 484 if request.startswith(b'upgrade '):
479 485 if protoswitched:
480 486 _sshv1respondooberror(fout, ui.ferr,
481 487 b'cannot upgrade protocols multiple '
482 488 b'times')
483 489 state = 'shutdown'
484 490 continue
485 491
486 492 state = 'upgrade-initial'
487 493 continue
488 494
489 495 available = wireproto.commands.commandavailable(request, proto)
490 496
491 497 # This command isn't available. Send an empty response and go
492 498 # back to waiting for a new command.
493 499 if not available:
494 500 _sshv1respondbytes(fout, b'')
495 501 continue
496 502
497 503 rsp = wireproto.dispatch(repo, proto, request)
498 504
499 505 if isinstance(rsp, bytes):
500 506 _sshv1respondbytes(fout, rsp)
501 507 elif isinstance(rsp, wireprototypes.bytesresponse):
502 508 _sshv1respondbytes(fout, rsp.data)
503 509 elif isinstance(rsp, wireprototypes.streamres):
504 510 _sshv1respondstream(fout, rsp)
505 511 elif isinstance(rsp, wireprototypes.streamreslegacy):
506 512 _sshv1respondstream(fout, rsp)
507 513 elif isinstance(rsp, wireprototypes.pushres):
508 514 _sshv1respondbytes(fout, b'')
509 515 _sshv1respondbytes(fout, b'%d' % rsp.res)
510 516 elif isinstance(rsp, wireprototypes.pusherr):
511 517 _sshv1respondbytes(fout, rsp.res)
512 518 elif isinstance(rsp, wireprototypes.ooberror):
513 519 _sshv1respondooberror(fout, ui.ferr, rsp.message)
514 520 else:
515 521 raise error.ProgrammingError('unhandled response type from '
516 522 'wire protocol command: %s' % rsp)
517 523
518 524 # For now, protocol version 2 serving just goes back to version 1.
519 525 elif state == 'protov2-serving':
520 526 state = 'protov1-serving'
521 527 continue
522 528
523 529 elif state == 'upgrade-initial':
524 530 # We should never transition into this state if we've switched
525 531 # protocols.
526 532 assert not protoswitched
527 533 assert proto.name == wireprototypes.SSHV1
528 534
529 535 # Expected: upgrade <token> <capabilities>
530 536 # If we get something else, the request is malformed. It could be
531 537 # from a future client that has altered the upgrade line content.
532 538 # We treat this as an unknown command.
533 539 try:
534 540 token, caps = request.split(b' ')[1:]
535 541 except ValueError:
536 542 _sshv1respondbytes(fout, b'')
537 543 state = 'protov1-serving'
538 544 continue
539 545
540 546 # Send empty response if we don't support upgrading protocols.
541 547 if not ui.configbool('experimental', 'sshserver.support-v2'):
542 548 _sshv1respondbytes(fout, b'')
543 549 state = 'protov1-serving'
544 550 continue
545 551
546 552 try:
547 553 caps = urlreq.parseqs(caps)
548 554 except ValueError:
549 555 _sshv1respondbytes(fout, b'')
550 556 state = 'protov1-serving'
551 557 continue
552 558
553 559 # We don't see an upgrade request to protocol version 2. Ignore
554 560 # the upgrade request.
555 561 wantedprotos = caps.get(b'proto', [b''])[0]
556 562 if SSHV2 not in wantedprotos:
557 563 _sshv1respondbytes(fout, b'')
558 564 state = 'protov1-serving'
559 565 continue
560 566
561 567 # It looks like we can honor this upgrade request to protocol 2.
562 568 # Filter the rest of the handshake protocol request lines.
563 569 state = 'upgrade-v2-filter-legacy-handshake'
564 570 continue
565 571
566 572 elif state == 'upgrade-v2-filter-legacy-handshake':
567 573 # Client should have sent legacy handshake after an ``upgrade``
568 574 # request. Expected lines:
569 575 #
570 576 # hello
571 577 # between
572 578 # pairs 81
573 579 # 0000...-0000...
574 580
575 581 ok = True
576 582 for line in (b'hello', b'between', b'pairs 81'):
577 583 request = fin.readline()[:-1]
578 584
579 585 if request != line:
580 586 _sshv1respondooberror(fout, ui.ferr,
581 587 b'malformed handshake protocol: '
582 588 b'missing %s' % line)
583 589 ok = False
584 590 state = 'shutdown'
585 591 break
586 592
587 593 if not ok:
588 594 continue
589 595
590 596 request = fin.read(81)
591 597 if request != b'%s-%s' % (b'0' * 40, b'0' * 40):
592 598 _sshv1respondooberror(fout, ui.ferr,
593 599 b'malformed handshake protocol: '
594 600 b'missing between argument value')
595 601 state = 'shutdown'
596 602 continue
597 603
598 604 state = 'upgrade-v2-finish'
599 605 continue
600 606
601 607 elif state == 'upgrade-v2-finish':
602 608 # Send the upgrade response.
603 609 fout.write(b'upgraded %s %s\n' % (token, SSHV2))
604 610 servercaps = wireproto.capabilities(repo, proto)
605 611 rsp = b'capabilities: %s' % servercaps.data
606 612 fout.write(b'%d\n%s\n' % (len(rsp), rsp))
607 613 fout.flush()
608 614
609 615 proto = sshv2protocolhandler(ui, fin, fout)
610 616 protoswitched = True
611 617
612 618 state = 'protov2-serving'
613 619 continue
614 620
615 621 elif state == 'shutdown':
616 622 break
617 623
618 624 else:
619 625 raise error.ProgrammingError('unhandled ssh server state: %s' %
620 626 state)
621 627
622 628 class sshserver(object):
623 629 def __init__(self, ui, repo, logfh=None):
624 630 self._ui = ui
625 631 self._repo = repo
626 632 self._fin = ui.fin
627 633 self._fout = ui.fout
628 634
629 635 # Log write I/O to stdout and stderr if configured.
630 636 if logfh:
631 637 self._fout = util.makeloggingfileobject(
632 638 logfh, self._fout, 'o', logdata=True)
633 639 ui.ferr = util.makeloggingfileobject(
634 640 logfh, ui.ferr, 'e', logdata=True)
635 641
636 642 hook.redirect(True)
637 643 ui.fout = repo.ui.fout = ui.ferr
638 644
639 645 # Prevent insertion/deletion of CRs
640 646 util.setbinary(self._fin)
641 647 util.setbinary(self._fout)
642 648
643 649 def serve_forever(self):
644 650 self.serveuntil(threading.Event())
645 651 sys.exit(0)
646 652
647 653 def serveuntil(self, ev):
648 654 """Serve until a threading.Event is set."""
649 655 _runsshserver(self._ui, self._repo, self._fin, self._fout, ev)
General Comments 0
You need to be logged in to leave comments. Login now