upstream/mercurial-mirror Commit - r36873:da4e2f87

hgweb: expose input stream on parsed WSGI request object...

Gregory Szorc -

r36873:da4e2f87 default

parent child

mercurial/hgweb/hgwebdir_mod.py

0 +2 -1

              # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
              #
              # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
              # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import os
              import re
              import time
              from ..i18n import _
              from .common import (
                  ErrorResponse,
                  HTTP_NOT_FOUND,
                  HTTP_OK,
                  HTTP_SERVER_ERROR,
                  cspvalues,
                  get_contact,
                  get_mtime,
                  ismember,
                  paritygen,
                  staticfile,
              )
              from .. import (
                  configitems,
                  encoding,
                  error,
                  hg,
                  profiling,
                  pycompat,
                  scmutil,
                  templater,
                  ui as uimod,
                  util,
              )
              from . import (
                  hgweb_mod,
                  request as requestmod,
                  webutil,
                  wsgicgi,
              )
              from ..utils import dateutil
              def cleannames(items):
                  return [(util.pconvert(name).strip('/'), path) for name, path in items]
              def findrepos(paths):
                  repos = []
                  for prefix, root in cleannames(paths):
                      roothead, roottail = os.path.split(root)
                      # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
                      # /bar/ be served as as foo/N .
                      # '*' will not search inside dirs with .hg (except .hg/patches),
                      # '**' will search inside dirs with .hg (and thus also find subrepos).
                      try:
                          recurse = {'*': False, '**': True}[roottail]
                      except KeyError:
                          repos.append((prefix, root))
                          continue
                      roothead = os.path.normpath(os.path.abspath(roothead))
                      paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
                      repos.extend(urlrepos(prefix, roothead, paths))
                  return repos
              def urlrepos(prefix, roothead, paths):
                  """yield url paths and filesystem paths from a list of repo paths
                  >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
                  >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
                  [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
                  >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
                  [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
                  """
                  for path in paths:
                      path = os.path.normpath(path)
                      yield (prefix + '/' +
                             util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
              def geturlcgivars(baseurl, port):
                  """
                  Extract CGI variables from baseurl
                  >>> geturlcgivars(b"http://host.org/base", b"80")
                  ('host.org', '80', '/base')
                  >>> geturlcgivars(b"http://host.org:8000/base", b"80")
                  ('host.org', '8000', '/base')
                  >>> geturlcgivars(b'/base', 8000)
                  ('', '8000', '/base')
                  >>> geturlcgivars(b"base", b'8000')
                  ('', '8000', '/base')
                  >>> geturlcgivars(b"http://host", b'8000')
                  ('host', '8000', '/')
                  >>> geturlcgivars(b"http://host/", b'8000')
                  ('host', '8000', '/')
                  """
                  u = util.url(baseurl)
                  name = u.host or ''
                  if u.port:
                      port = u.port
                  path = u.path or ""
                  if not path.startswith('/'):
                      path = '/' + path
                  return name, pycompat.bytestr(port), path
              class hgwebdir(object):
                  """HTTP server for multiple repositories.
                  Given a configuration, different repositories will be served depending
                  on the request path.
                  Instances are typically used as WSGI applications.
                  """
                  def __init__(self, conf, baseui=None):
                      self.conf = conf
                      self.baseui = baseui
                      self.ui = None
                      self.lastrefresh = 0
                      self.motd = None
                      self.refresh()
                  def refresh(self):
                      if self.ui:
                          refreshinterval = self.ui.configint('web', 'refreshinterval')
                      else:
                          item = configitems.coreitems['web']['refreshinterval']
                          refreshinterval = item.default
                      # refreshinterval <= 0 means to always refresh.
                      if (refreshinterval > 0 and
                          self.lastrefresh + refreshinterval > time.time()):
                          return
                      if self.baseui:
                          u = self.baseui.copy()
                      else:
                          u = uimod.ui.load()
                          u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
                          u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
                          # displaying bundling progress bar while serving feels wrong and may
                          # break some wsgi implementations.
                          u.setconfig('progress', 'disable', 'true', 'hgweb')
                      if not isinstance(self.conf, (dict, list, tuple)):
                          map = {'paths': 'hgweb-paths'}
                          if not os.path.exists(self.conf):
                              raise error.Abort(_('config file %s not found!') % self.conf)
                          u.readconfig(self.conf, remap=map, trust=True)
                          paths = []
                          for name, ignored in u.configitems('hgweb-paths'):
                              for path in u.configlist('hgweb-paths', name):
                                  paths.append((name, path))
                      elif isinstance(self.conf, (list, tuple)):
                          paths = self.conf
                      elif isinstance(self.conf, dict):
                          paths = self.conf.items()
                      repos = findrepos(paths)
                      for prefix, root in u.configitems('collections'):
                          prefix = util.pconvert(prefix)
                          for path in scmutil.walkrepos(root, followsym=True):
                              repo = os.path.normpath(path)
                              name = util.pconvert(repo)
                              if name.startswith(prefix):
                                  name = name[len(prefix):]
                              repos.append((name.lstrip('/'), repo))
                      self.repos = repos
                      self.ui = u
                      encoding.encoding = self.ui.config('web', 'encoding')
                      self.style = self.ui.config('web', 'style')
                      self.templatepath = self.ui.config('web', 'templates', untrusted=False)
                      self.stripecount = self.ui.config('web', 'stripes')
                      if self.stripecount:
                          self.stripecount = int(self.stripecount)
                      self._baseurl = self.ui.config('web', 'baseurl')
                      prefix = self.ui.config('web', 'prefix')
                      if prefix.startswith('/'):
                          prefix = prefix[1:]
                      if prefix.endswith('/'):
                          prefix = prefix[:-1]
                      self.prefix = prefix
                      self.lastrefresh = time.time()
                  def run(self):
                      if not encoding.environ.get('GATEWAY_INTERFACE',
                                                  '').startswith("CGI/1."):
                          raise RuntimeError("This function is only intended to be "
                                             "called while running as a CGI script.")
                      wsgicgi.launch(self)
                  def __call__(self, env, respond):
                      wsgireq = requestmod.wsgirequest(env, respond)
                      return self.run_wsgi(wsgireq)
                  def read_allowed(self, ui, wsgireq):
                      """Check allow_read and deny_read config options of a repo's ui object
                      to determine user permissions.  By default, with neither option set (or
                      both empty), allow all users to read the repo.  There are two ways a
                      user can be denied read access:  (1) deny_read is not empty, and the
                      user is unauthenticated or deny_read contains user (or *), and (2)
                      allow_read is not empty and the user is not in allow_read.  Return True
                      if user is allowed to read the repo, else return False."""
                      user = wsgireq.env.get('REMOTE_USER')
                      deny_read = ui.configlist('web', 'deny_read', untrusted=True)
                      if deny_read and (not user or ismember(ui, user, deny_read)):
                          return False
                      allow_read = ui.configlist('web', 'allow_read', untrusted=True)
                      # by default, allow reading if no allow_read option has been set
                      if (not allow_read) or ismember(ui, user, allow_read):
                          return True
                      return False
                  def run_wsgi(self, wsgireq):
                      profile = self.ui.configbool('profiling', 'enabled')
                      with profiling.profile(self.ui, enabled=profile):
                          for r in self._runwsgi(wsgireq):
                              yield r
                  def _runwsgi(self, wsgireq):
                      try:
                          self.refresh()
                          csp, nonce = cspvalues(self.ui)
                          if csp:
                              wsgireq.headers.append(('Content-Security-Policy', csp))
                          virtual = wsgireq.env.get("PATH_INFO", "").strip('/')
                          tmpl = self.templater(wsgireq, nonce)
                          ctype = tmpl('mimetype', encoding=encoding.encoding)
                          ctype = templater.stringify(ctype)
                          # a static file
                          if virtual.startswith('static/') or 'static' in wsgireq.form:
                              if virtual.startswith('static/'):
                                  fname = virtual[7:]
                              else:
                                  fname = wsgireq.form['static'][0]
                              static = self.ui.config("web", "static", None,
                                                      untrusted=False)
                              if not static:
                                  tp = self.templatepath or templater.templatepaths()
                                  if isinstance(tp, str):
                                      tp = [tp]
                                  static = [os.path.join(p, 'static') for p in tp]
                              staticfile(static, fname, wsgireq)
                              return []
                          # top-level index
                          repos = dict(self.repos)
                          if (not virtual or virtual == 'index') and virtual not in repos:
                              wsgireq.respond(HTTP_OK, ctype)
                              return self.makeindex(wsgireq, tmpl)
                          # nested indexes and hgwebs
                          if virtual.endswith('/index') and virtual not in repos:
                              subdir = virtual[:-len('index')]
                              if any(r.startswith(subdir) for r in repos):
                                  wsgireq.respond(HTTP_OK, ctype)
                                  return self.makeindex(wsgireq, tmpl, subdir)
                          def _virtualdirs():
                              # Check the full virtual path, each parent, and the root ('')
                              if virtual != '':
                                  yield virtual
                                  for p in util.finddirs(virtual):
                                      yield p
                              yield ''
                          for virtualrepo in _virtualdirs():
                              real = repos.get(virtualrepo)
                              if real:
                                  wsgireq.env['REPO_NAME'] = virtualrepo
                                  # We have to re-parse because of updated environment
                                  # variable.
                                  # TODO this is kind of hacky and we should have a better
                                  # way of doing this than with REPO_NAME side-effects.
-                                 wsgireq.req = requestmod.parserequestfromenv(wsgireq.env)
+                                 wsgireq.req = requestmod.parserequestfromenv(
+                                     wsgireq.env, wsgireq.req.bodyfh)
                                  try:
                                      # ensure caller gets private copy of ui
                                      repo = hg.repository(self.ui.copy(), real)
                                      return hgweb_mod.hgweb(repo).run_wsgi(wsgireq)
                                  except IOError as inst:
                                      msg = encoding.strtolocal(inst.strerror)
                                      raise ErrorResponse(HTTP_SERVER_ERROR, msg)
                                  except error.RepoError as inst:
                                      raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
                          # browse subdirectories
                          subdir = virtual + '/'
                          if [r for r in repos if r.startswith(subdir)]:
                              wsgireq.respond(HTTP_OK, ctype)
                              return self.makeindex(wsgireq, tmpl, subdir)
                          # prefixes not found
                          wsgireq.respond(HTTP_NOT_FOUND, ctype)
                          return tmpl("notfound", repo=virtual)
                      except ErrorResponse as err:
                          wsgireq.respond(err, ctype)
                          return tmpl('error', error=err.message or '')
                      finally:
                          tmpl = None
                  def makeindex(self, wsgireq, tmpl, subdir=""):
                      def archivelist(ui, nodeid, url):
                          allowed = ui.configlist("web", "allow_archive", untrusted=True)
                          archives = []
                          for typ, spec in hgweb_mod.archivespecs.iteritems():
                              if typ in allowed or ui.configbool("web", "allow" + typ,
                                                                  untrusted=True):
                                  archives.append({"type": typ, "extension": spec[2],
                                                   "node": nodeid, "url": url})
                          return archives
                      def rawentries(subdir="", **map):
                          descend = self.ui.configbool('web', 'descend')
                          collapse = self.ui.configbool('web', 'collapse')
                          seenrepos = set()
                          seendirs = set()
                          for name, path in self.repos:
                              if not name.startswith(subdir):
                                  continue
                              name = name[len(subdir):]
                              directory = False
                              if '/' in name:
                                  if not descend:
                                      continue
                                  nameparts = name.split('/')
                                  rootname = nameparts[0]
                                  if not collapse:
                                      pass
                                  elif rootname in seendirs:
                                      continue
                                  elif rootname in seenrepos:
                                      pass
                                  else:
                                      directory = True
                                      name = rootname
                                      # redefine the path to refer to the directory
                                      discarded = '/'.join(nameparts[1:])
                                      # remove name parts plus accompanying slash
                                      path = path[:-len(discarded) - 1]
                                      try:
                                          r = hg.repository(self.ui, path)
                                          directory = False
                                      except (IOError, error.RepoError):
                                          pass
                              parts = [name]
                              parts.insert(0, '/' + subdir.rstrip('/'))
                              if wsgireq.env['SCRIPT_NAME']:
                                  parts.insert(0, wsgireq.env['SCRIPT_NAME'])
                              url = re.sub(r'/+', '/', '/'.join(parts) + '/')
                              # show either a directory entry or a repository
                              if directory:
                                  # get the directory's time information
                                  try:
                                      d = (get_mtime(path), dateutil.makedate()[1])
                                  except OSError:
                                      continue
                                  # add '/' to the name to make it obvious that
                                  # the entry is a directory, not a regular repository
                                  row = {'contact': "",
                                         'contact_sort': "",
                                         'name': name + '/',
                                         'name_sort': name,
                                         'url': url,
                                         'description': "",
                                         'description_sort': "",
                                         'lastchange': d,
                                         'lastchange_sort': d[1]-d[0],
                                         'archives': [],
                                         'isdirectory': True,
                                         'labels': [],
                                         }
                                  seendirs.add(name)
                                  yield row
                                  continue
                              u = self.ui.copy()
                              try:
                                  u.readconfig(os.path.join(path, '.hg', 'hgrc'))
                              except Exception as e:
                                  u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
                                  continue
                              def get(section, name, default=uimod._unset):
                                  return u.config(section, name, default, untrusted=True)
                              if u.configbool("web", "hidden", untrusted=True):
                                  continue
                              if not self.read_allowed(u, wsgireq):
                                  continue
                              # update time with local timezone
                              try:
                                  r = hg.repository(self.ui, path)
                              except IOError:
                                  u.warn(_('error accessing repository at %s\n') % path)
                                  continue
                              except error.RepoError:
                                  u.warn(_('error accessing repository at %s\n') % path)
                                  continue
                              try:
                                  d = (get_mtime(r.spath), dateutil.makedate()[1])
                              except OSError:
                                  continue
                              contact = get_contact(get)
                              description = get("web", "description")
                              seenrepos.add(name)
                              name = get("web", "name", name)
                              row = {'contact': contact or "unknown",
                                     'contact_sort': contact.upper() or "unknown",
                                     'name': name,
                                     'name_sort': name,
                                     'url': url,
                                     'description': description or "unknown",
                                     'description_sort': description.upper() or "unknown",
                                     'lastchange': d,
                                     'lastchange_sort': d[1]-d[0],
                                     'archives': archivelist(u, "tip", url),
                                     'isdirectory': None,
                                     'labels': u.configlist('web', 'labels', untrusted=True),
                                     }
                              yield row
                      sortdefault = None, False
                      def entries(sortcolumn="", descending=False, subdir="", **map):
                          rows = rawentries(subdir=subdir, **map)
                          if sortcolumn and sortdefault != (sortcolumn, descending):
                              sortkey = '%s_sort' % sortcolumn
                              rows = sorted(rows, key=lambda x: x[sortkey],
                                            reverse=descending)
                          for row, parity in zip(rows, paritygen(self.stripecount)):
                              row['parity'] = parity
                              yield row
                      self.refresh()
                      sortable = ["name", "description", "contact", "lastchange"]
                      sortcolumn, descending = sortdefault
                      if 'sort' in wsgireq.form:
                          sortcolumn = wsgireq.form['sort'][0]
                          descending = sortcolumn.startswith('-')
                          if descending:
                              sortcolumn = sortcolumn[1:]
                          if sortcolumn not in sortable:
                              sortcolumn = ""
                      sort = [("sort_%s" % column,
                               "%s%s" % ((not descending and column == sortcolumn)
                                          and "-" or "", column))
                              for column in sortable]
                      self.refresh()
                      self.updatereqenv(wsgireq.env)
                      return tmpl("index", entries=entries, subdir=subdir,
                                  pathdef=hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
                                  sortcolumn=sortcolumn, descending=descending,
                                  **dict(sort))
                  def templater(self, wsgireq, nonce):
                      def motd(**map):
                          if self.motd is not None:
                              yield self.motd
                          else:
                              yield config('web', 'motd')
                      def config(section, name, default=uimod._unset, untrusted=True):
                          return self.ui.config(section, name, default, untrusted)
                      self.updatereqenv(wsgireq.env)
                      url = wsgireq.env.get('SCRIPT_NAME', '')
                      if not url.endswith('/'):
                          url += '/'
                      vars = {}
                      styles, (style, mapfile) = hgweb_mod.getstyle(wsgireq, config,
                                                                    self.templatepath)
                      if style == styles[0]:
                          vars['style'] = style
                      sessionvars = webutil.sessionvars(vars, r'?')
                      logourl = config('web', 'logourl')
                      logoimg = config('web', 'logoimg')
                      staticurl = config('web', 'staticurl') or url + 'static/'
                      if not staticurl.endswith('/'):
                          staticurl += '/'
                      defaults = {
                          "encoding": encoding.encoding,
                          "motd": motd,
                          "url": url,
                          "logourl": logourl,
                          "logoimg": logoimg,
                          "staticurl": staticurl,
                          "sessionvars": sessionvars,
                          "style": style,
                          "nonce": nonce,
                      }
                      tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
                      return tmpl
                  def updatereqenv(self, env):
                      if self._baseurl is not None:
                          name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
                          env['SERVER_NAME'] = name
                          env['SERVER_PORT'] = port
                          env['SCRIPT_NAME'] = path

mercurial/hgweb/request.py

0 +24 -13

              # hgweb/request.py - An http request from either CGI or the standalone server.
              #
              # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
              # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import cgi
              import errno
              import socket
              import wsgiref.headers as wsgiheaders
              #import wsgiref.validate
              from .common import (
                  ErrorResponse,
                  HTTP_NOT_MODIFIED,
                  statusmessage,
              )
              from ..thirdparty import (
                  attr,
              )
              from .. import (
                  pycompat,
                  util,
              )
              shortcuts = {
                  'cl': [('cmd', ['changelog']), ('rev', None)],
                  'sl': [('cmd', ['shortlog']), ('rev', None)],
                  'cs': [('cmd', ['changeset']), ('node', None)],
                  'f': [('cmd', ['file']), ('filenode', None)],
                  'fl': [('cmd', ['filelog']), ('filenode', None)],
                  'fd': [('cmd', ['filediff']), ('node', None)],
                  'fa': [('cmd', ['annotate']), ('filenode', None)],
                  'mf': [('cmd', ['manifest']), ('manifest', None)],
                  'ca': [('cmd', ['archive']), ('node', None)],
                  'tags': [('cmd', ['tags'])],
                  'tip': [('cmd', ['changeset']), ('node', ['tip'])],
                  'static': [('cmd', ['static']), ('file', None)]
              }
              def normalize(form):
                  # first expand the shortcuts
                  for k in shortcuts:
                      if k in form:
                          for name, value in shortcuts[k]:
                              if value is None:
                                  value = form[k]
                              form[name] = value
                          del form[k]
                  # And strip the values
                  bytesform = {}
                  for k, v in form.iteritems():
                      bytesform[pycompat.bytesurl(k)] = [
                          pycompat.bytesurl(i.strip()) for i in v]
                  return bytesform
              @attr.s(frozen=True)
              class parsedrequest(object):
-                 """Represents a parsed WSGI request / static HTTP request parameters."""
+                 """Represents a parsed WSGI request.
+                 Contains both parsed parameters as well as a handle on the input stream.
+                 """
                  # Request method.
                  method = attr.ib()
                  # Full URL for this request.
                  url = attr.ib()
                  # URL without any path components. Just <proto>://<host><port>.
                  baseurl = attr.ib()
                  # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
                  # of HTTP: Host header for hostname. This is likely what clients used.
                  advertisedurl = attr.ib()
                  advertisedbaseurl = attr.ib()
                  # WSGI application path.
                  apppath = attr.ib()
                  # List of path parts to be used for dispatch.
                  dispatchparts = attr.ib()
                  # URL path component (no query string) used for dispatch.
                  dispatchpath = attr.ib()
                  # Whether there is a path component to this request. This can be true
                  # when ``dispatchpath`` is empty due to REPO_NAME muckery.
                  havepathinfo = attr.ib()
                  # Raw query string (part after "?" in URL).
                  querystring = attr.ib()
                  # List of 2-tuples of query string arguments.
                  querystringlist = attr.ib()
                  # Dict of query string arguments. Values are lists with at least 1 item.
                  querystringdict = attr.ib()
                  # wsgiref.headers.Headers instance. Operates like a dict with case
                  # insensitive keys.
                  headers = attr.ib()
+                 # Request body input stream.
+                 bodyfh = attr.ib()
-             def parserequestfromenv(env):
+             def parserequestfromenv(env, bodyfh):
                  """Parse URL components from environment variables.
                  WSGI defines request attributes via environment variables. This function
                  parses the environment variables into a data structure.
                  """
                  # PEP-0333 defines the WSGI spec and is a useful reference for this code.
                  # We first validate that the incoming object conforms with the WSGI spec.
                  # We only want to be dealing with spec-conforming WSGI implementations.
                  # TODO enable this once we fix internal violations.
                  #wsgiref.validate.check_environ(env)
                  # PEP-0333 states that environment keys and values are native strings
                  # (bytes on Python 2 and str on Python 3). The code points for the Unicode
                  # strings on Python 3 must be between \00000-\000FF. We deal with bytes
                  # in Mercurial, so mass convert string keys and values to bytes.
                  if pycompat.ispy3:
                      env = {k.encode('latin-1'): v for k, v in env.iteritems()}
                      env = {k: v.encode('latin-1') if isinstance(v, str) else v
                             for k, v in env.iteritems()}
                  # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
                  # the environment variables.
                  # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
                  # how URLs are reconstructed.
                  fullurl = env['wsgi.url_scheme'] + '://'
                  advertisedfullurl = fullurl
                  def addport(s):
                      if env['wsgi.url_scheme'] == 'https':
                          if env['SERVER_PORT'] != '443':
                              s += ':' + env['SERVER_PORT']
                      else:
                          if env['SERVER_PORT'] != '80':
                              s += ':' + env['SERVER_PORT']
                      return s
                  if env.get('HTTP_HOST'):
                      fullurl += env['HTTP_HOST']
                  else:
                      fullurl += env['SERVER_NAME']
                      fullurl = addport(fullurl)
                  advertisedfullurl += env['SERVER_NAME']
                  advertisedfullurl = addport(advertisedfullurl)
                  baseurl = fullurl
                  advertisedbaseurl = advertisedfullurl
                  fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                  advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                  fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                  advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                  if env.get('QUERY_STRING'):
                      fullurl += '?' + env['QUERY_STRING']
                      advertisedfullurl += '?' + env['QUERY_STRING']
                  # When dispatching requests, we look at the URL components (PATH_INFO
                  # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
                  # has the concept of "virtual" repositories. This is defined via REPO_NAME.
                  # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
                  # root. We also exclude its path components from PATH_INFO when resolving
                  # the dispatch path.
                  apppath = env['SCRIPT_NAME']
                  if env.get('REPO_NAME'):
                      if not apppath.endswith('/'):
                          apppath += '/'
                      apppath += env.get('REPO_NAME')
                  if 'PATH_INFO' in env:
                      dispatchparts = env['PATH_INFO'].strip('/').split('/')
                      # Strip out repo parts.
                      repoparts = env.get('REPO_NAME', '').split('/')
                      if dispatchparts[:len(repoparts)] == repoparts:
                          dispatchparts = dispatchparts[len(repoparts):]
                  else:
                      dispatchparts = []
                  dispatchpath = '/'.join(dispatchparts)
                  querystring = env.get('QUERY_STRING', '')
                  # We store as a list so we have ordering information. We also store as
                  # a dict to facilitate fast lookup.
                  querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
                  querystringdict = {}
                  for k, v in querystringlist:
                      if k in querystringdict:
                          querystringdict[k].append(v)
                      else:
                          querystringdict[k] = [v]
                  # HTTP_* keys contain HTTP request headers. The Headers structure should
                  # perform case normalization for us. We just rewrite underscore to dash
                  # so keys match what likely went over the wire.
                  headers = []
                  for k, v in env.iteritems():
                      if k.startswith('HTTP_'):
                          headers.append((k[len('HTTP_'):].replace('_', '-'), v))
                  headers = wsgiheaders.Headers(headers)
                  # This is kind of a lie because the HTTP header wasn't explicitly
                  # sent. But for all intents and purposes it should be OK to lie about
                  # this, since a consumer will either either value to determine how many
                  # bytes are available to read.
                  if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
                      headers['Content-Length'] = env['CONTENT_LENGTH']
+                 # TODO do this once we remove wsgirequest.inp, otherwise we could have
+                 # multiple readers from the underlying input stream.
+                 #bodyfh = env['wsgi.input']
+                 #if 'Content-Length' in headers:
+                 #    bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
                  return parsedrequest(method=env['REQUEST_METHOD'],
                                       url=fullurl, baseurl=baseurl,
                                       advertisedurl=advertisedfullurl,
                                       advertisedbaseurl=advertisedbaseurl,
                                       apppath=apppath,
                                       dispatchparts=dispatchparts, dispatchpath=dispatchpath,
                                       havepathinfo='PATH_INFO' in env,
                                       querystring=querystring,
                                       querystringlist=querystringlist,
                                       querystringdict=querystringdict,
-                                      headers=headers)
+                                      headers=headers,
+                                      bodyfh=bodyfh)
              class wsgirequest(object):
                  """Higher-level API for a WSGI request.
                  WSGI applications are invoked with 2 arguments. They are used to
                  instantiate instances of this class, which provides higher-level APIs
                  for obtaining request parameters, writing HTTP output, etc.
                  """
                  def __init__(self, wsgienv, start_response):
                      version = wsgienv[r'wsgi.version']
                      if (version < (1, 0)) or (version >= (2, 0)):
                          raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
                                             % version)
-                     self.inp = wsgienv[r'wsgi.input']
+                     inp = wsgienv[r'wsgi.input']
                      if r'HTTP_CONTENT_LENGTH' in wsgienv:
-                         self.inp = util.cappedreader(self.inp,
-                                                      int(wsgienv[r'HTTP_CONTENT_LENGTH']))
+                         inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
                      elif r'CONTENT_LENGTH' in wsgienv:
-                         self.inp = util.cappedreader(self.inp,
-                                                      int(wsgienv[r'CONTENT_LENGTH']))
+                         inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
                      self.err = wsgienv[r'wsgi.errors']
                      self.threaded = wsgienv[r'wsgi.multithread']
                      self.multiprocess = wsgienv[r'wsgi.multiprocess']
                      self.run_once = wsgienv[r'wsgi.run_once']
                      self.env = wsgienv
-                     self.form = normalize(cgi.parse(self.inp,
+                     self.form = normalize(cgi.parse(inp,
                                                      self.env,
                                                      keep_blank_values=1))
                      self._start_response = start_response
                      self.server_write = None
                      self.headers = []
-                     self.req = parserequestfromenv(wsgienv)
+                     self.req = parserequestfromenv(wsgienv, inp)
                  def respond(self, status, type, filename=None, body=None):
                      if not isinstance(type, str):
                          type = pycompat.sysstr(type)
                      if self._start_response is not None:
                          self.headers.append((r'Content-Type', type))
                          if filename:
                              filename = (filename.rpartition('/')[-1]
                                          .replace('\\', '\\\\').replace('"', '\\"'))
                              self.headers.append(('Content-Disposition',
                                                   'inline; filename="%s"' % filename))
                          if body is not None:
                              self.headers.append((r'Content-Length', str(len(body))))
                          for k, v in self.headers:
                              if not isinstance(v, str):
                                  raise TypeError('header value must be string: %r' % (v,))
                          if isinstance(status, ErrorResponse):
                              self.headers.extend(status.headers)
                              if status.code == HTTP_NOT_MODIFIED:
                                  # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
                                  # it MUST NOT include any headers other than these and no
                                  # body
                                  self.headers = [(k, v) for (k, v) in self.headers if
                                                  k in ('Date', 'ETag', 'Expires',
                                                        'Cache-Control', 'Vary')]
                              status = statusmessage(status.code, pycompat.bytestr(status))
                          elif status == 200:
                              status = '200 Script output follows'
                          elif isinstance(status, int):
                              status = statusmessage(status)
                          # Various HTTP clients (notably httplib) won't read the HTTP
                          # response until the HTTP request has been sent in full. If servers
                          # (us) send a response before the HTTP request has been fully sent,
                          # the connection may deadlock because neither end is reading.
                          #
                          # We work around this by "draining" the request data before
                          # sending any response in some conditions.
                          drain = False
                          close = False
                          # If the client sent Expect: 100-continue, we assume it is smart
                          # enough to deal with the server sending a response before reading
                          # the request. (httplib doesn't do this.)
                          if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
                              pass
                          # Only tend to request methods that have bodies. Strictly speaking,
                          # we should sniff for a body. But this is fine for our existing
                          # WSGI applications.
                          elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
                              pass
                          else:
                              # If we don't know how much data to read, there's no guarantee
                              # that we can drain the request responsibly. The WSGI
                              # specification only says that servers *should* ensure the
                              # input stream doesn't overrun the actual request. So there's
                              # no guarantee that reading until EOF won't corrupt the stream
                              # state.
-                             if not isinstance(self.inp, util.cappedreader):
+                             if not isinstance(self.req.bodyfh, util.cappedreader):
                                  close = True
                              else:
                                  # We /could/ only drain certain HTTP response codes. But 200
                                  # and non-200 wire protocol responses both require draining.
                                  # Since we have a capped reader in place for all situations
                                  # where we drain, it is safe to read from that stream. We'll
                                  # either do a drain or no-op if we're already at EOF.
                                  drain = True
                          if close:
                              self.headers.append((r'Connection', r'Close'))
                          if drain:
-                             assert isinstance(self.inp, util.cappedreader)
+                             assert isinstance(self.req.bodyfh, util.cappedreader)
                              while True:
-                                 chunk = self.inp.read(32768)
+                                 chunk = self.req.bodyfh.read(32768)
                                  if not chunk:
                                      break
                          self.server_write = self._start_response(
                              pycompat.sysstr(status), self.headers)
                          self._start_response = None
                          self.headers = []
                      if body is not None:
                          self.write(body)
                          self.server_write = None
                  def write(self, thing):
                      if thing:
                          try:
                              self.server_write(thing)
                          except socket.error as inst:
                              if inst[0] != errno.ECONNRESET:
                                  raise
                  def flush(self):
                      return None
              def wsgiapplication(app_maker):
                  '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
                  can and should now be used as a WSGI application.'''
                  application = app_maker()
                  def run_wsgi(env, respond):
                      return application(env, respond)
                  return run_wsgi

mercurial/wireprotoserver.py

0 +2 -2

              # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import contextlib
              import struct
              import sys
              import threading
              from .i18n import _
              from . import (
                  encoding,
                  error,
                  hook,
                  pycompat,
                  util,
                  wireproto,
                  wireprototypes,
              )
              stringio = util.stringio
              urlerr = util.urlerr
              urlreq = util.urlreq
              HTTP_OK = 200
              HGTYPE = 'application/mercurial-0.1'
              HGTYPE2 = 'application/mercurial-0.2'
              HGERRTYPE = 'application/hg-error'
              SSHV1 = wireprototypes.SSHV1
              SSHV2 = wireprototypes.SSHV2
              def decodevaluefromheaders(req, headerprefix):
                  """Decode a long value from multiple HTTP request headers.
                  Returns the value as a bytes, not a str.
                  """
                  chunks = []
                  i = 1
                  while True:
                      v = req.headers.get(b'%s-%d' % (headerprefix, i))
                      if v is None:
                          break
                      chunks.append(pycompat.bytesurl(v))
                      i += 1
                  return ''.join(chunks)
              class httpv1protocolhandler(wireprototypes.baseprotocolhandler):
                  def __init__(self, wsgireq, req, ui, checkperm):
                      self._wsgireq = wsgireq
                      self._req = req
                      self._ui = ui
                      self._checkperm = checkperm
                  @property
                  def name(self):
                      return 'http-v1'
                  def getargs(self, args):
                      knownargs = self._args()
                      data = {}
                      keys = args.split()
                      for k in keys:
                          if k == '*':
                              star = {}
                              for key in knownargs.keys():
                                  if key != 'cmd' and key not in keys:
                                      star[key] = knownargs[key][0]
                              data['*'] = star
                          else:
                              data[k] = knownargs[k][0]
                      return [data[k] for k in keys]
                  def _args(self):
                      args = util.rapply(pycompat.bytesurl, self._wsgireq.form.copy())
                      postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
                      if postlen:
                          args.update(urlreq.parseqs(
-                             self._wsgireq.inp.read(postlen), keep_blank_values=True))
+                             self._req.bodyfh.read(postlen), keep_blank_values=True))
                          return args
                      argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
                      args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
                      return args
                  def forwardpayload(self, fp):
                      # Existing clients *always* send Content-Length.
                      length = int(self._req.headers[b'Content-Length'])
                      # If httppostargs is used, we need to read Content-Length
                      # minus the amount that was consumed by args.
                      length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
-                     for s in util.filechunkiter(self._wsgireq.inp, limit=length):
+                     for s in util.filechunkiter(self._req.bodyfh, limit=length):
                          fp.write(s)
                  @contextlib.contextmanager
                  def mayberedirectstdio(self):
                      oldout = self._ui.fout
                      olderr = self._ui.ferr
                      out = util.stringio()
                      try:
                          self._ui.fout = out
                          self._ui.ferr = out
                          yield out
                      finally:
                          self._ui.fout = oldout
                          self._ui.ferr = olderr
                  def client(self):
                      return 'remote:%s:%s:%s' % (
                          self._wsgireq.env.get('wsgi.url_scheme') or 'http',
                          urlreq.quote(self._wsgireq.env.get('REMOTE_HOST', '')),
                          urlreq.quote(self._wsgireq.env.get('REMOTE_USER', '')))
                  def addcapabilities(self, repo, caps):
                      caps.append('httpheader=%d' %
                                  repo.ui.configint('server', 'maxhttpheaderlen'))
                      if repo.ui.configbool('experimental', 'httppostargs'):
                          caps.append('httppostargs')
                      # FUTURE advertise 0.2rx once support is implemented
                      # FUTURE advertise minrx and mintx after consulting config option
                      caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
                      compengines = wireproto.supportedcompengines(repo.ui, util.SERVERROLE)
                      if compengines:
                          comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
                                               for e in compengines)
                          caps.append('compression=%s' % comptypes)
                      return caps
                  def checkperm(self, perm):
                      return self._checkperm(perm)
              # This method exists mostly so that extensions like remotefilelog can
              # disable a kludgey legacy method only over http. As of early 2018,
              # there are no other known users, so with any luck we can discard this
              # hook if remotefilelog becomes a first-party extension.
              def iscmd(cmd):
                  return cmd in wireproto.commands
              def handlewsgirequest(rctx, wsgireq, req, checkperm):
                  """Possibly process a wire protocol request.
                  If the current request is a wire protocol request, the request is
                  processed by this function.
                  ``wsgireq`` is a ``wsgirequest`` instance.
                  ``req`` is a ``parsedrequest`` instance.
                  Returns a 2-tuple of (bool, response) where the 1st element indicates
                  whether the request was handled and the 2nd element is a return
                  value for a WSGI application (often a generator of bytes).
                  """
                  # Avoid cycle involving hg module.
                  from .hgweb import common as hgwebcommon
                  repo = rctx.repo
                  # HTTP version 1 wire protocol requests are denoted by a "cmd" query
                  # string parameter. If it isn't present, this isn't a wire protocol
                  # request.
                  if 'cmd' not in req.querystringdict:
                      return False, None
                  cmd = req.querystringdict['cmd'][0]
                  # The "cmd" request parameter is used by both the wire protocol and hgweb.
                  # While not all wire protocol commands are available for all transports,
                  # if we see a "cmd" value that resembles a known wire protocol command, we
                  # route it to a protocol handler. This is better than routing possible
                  # wire protocol requests to hgweb because it prevents hgweb from using
                  # known wire protocol commands and it is less confusing for machine
                  # clients.
                  if not iscmd(cmd):
                      return False, None
                  # The "cmd" query string argument is only valid on the root path of the
                  # repo. e.g. ``/?cmd=foo``, ``/repo?cmd=foo``. URL paths within the repo
                  # like ``/blah?cmd=foo`` are not allowed. So don't recognize the request
                  # in this case. We send an HTTP 404 for backwards compatibility reasons.
                  if req.dispatchpath:
                      res = _handlehttperror(
                          hgwebcommon.ErrorResponse(hgwebcommon.HTTP_NOT_FOUND), wsgireq,
                          req)
                      return True, res
                  proto = httpv1protocolhandler(wsgireq, req, repo.ui,
                                                lambda perm: checkperm(rctx, wsgireq, perm))
                  # The permissions checker should be the only thing that can raise an
                  # ErrorResponse. It is kind of a layer violation to catch an hgweb
                  # exception here. So consider refactoring into a exception type that
                  # is associated with the wire protocol.
                  try:
                      res = _callhttp(repo, wsgireq, req, proto, cmd)
                  except hgwebcommon.ErrorResponse as e:
                      res = _handlehttperror(e, wsgireq, req)
                  return True, res
              def _httpresponsetype(ui, req, prefer_uncompressed):
                  """Determine the appropriate response type and compression settings.
                  Returns a tuple of (mediatype, compengine, engineopts).
                  """
                  # Determine the response media type and compression engine based
                  # on the request parameters.
                  protocaps = decodevaluefromheaders(req, 'X-HgProto').split(' ')
                  if '0.2' in protocaps:
                      # All clients are expected to support uncompressed data.
                      if prefer_uncompressed:
                          return HGTYPE2, util._noopengine(), {}
                      # Default as defined by wire protocol spec.
                      compformats = ['zlib', 'none']
                      for cap in protocaps:
                          if cap.startswith('comp='):
                              compformats = cap[5:].split(',')
                              break
                      # Now find an agreed upon compression format.
                      for engine in wireproto.supportedcompengines(ui, util.SERVERROLE):
                          if engine.wireprotosupport().name in compformats:
                              opts = {}
                              level = ui.configint('server', '%slevel' % engine.name())
                              if level is not None:
                                  opts['level'] = level
                              return HGTYPE2, engine, opts
                      # No mutually supported compression format. Fall back to the
                      # legacy protocol.
                  # Don't allow untrusted settings because disabling compression or
                  # setting a very high compression level could lead to flooding
                  # the server's network or CPU.
                  opts = {'level': ui.configint('server', 'zliblevel')}
                  return HGTYPE, util.compengines['zlib'], opts
              def _callhttp(repo, wsgireq, req, proto, cmd):
                  def genversion2(gen, engine, engineopts):
                      # application/mercurial-0.2 always sends a payload header
                      # identifying the compression engine.
                      name = engine.wireprotosupport().name
                      assert 0 < len(name) < 256
                      yield struct.pack('B', len(name))
                      yield name
                      for chunk in gen:
                          yield chunk
                  if not wireproto.commands.commandavailable(cmd, proto):
                      wsgireq.respond(HTTP_OK, HGERRTYPE,
                                      body=_('requested wire protocol command is not '
                                             'available over HTTP'))
                      return []
                  proto.checkperm(wireproto.commands[cmd].permission)
                  rsp = wireproto.dispatch(repo, proto, cmd)
                  if isinstance(rsp, bytes):
                      wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
                      return []
                  elif isinstance(rsp, wireprototypes.bytesresponse):
                      wsgireq.respond(HTTP_OK, HGTYPE, body=rsp.data)
                      return []
                  elif isinstance(rsp, wireprototypes.streamreslegacy):
                      gen = rsp.gen
                      wsgireq.respond(HTTP_OK, HGTYPE)
                      return gen
                  elif isinstance(rsp, wireprototypes.streamres):
                      gen = rsp.gen
                      # This code for compression should not be streamres specific. It
                      # is here because we only compress streamres at the moment.
                      mediatype, engine, engineopts = _httpresponsetype(
                          repo.ui, req, rsp.prefer_uncompressed)
                      gen = engine.compressstream(gen, engineopts)
                      if mediatype == HGTYPE2:
                          gen = genversion2(gen, engine, engineopts)
                      wsgireq.respond(HTTP_OK, mediatype)
                      return gen
                  elif isinstance(rsp, wireprototypes.pushres):
                      rsp = '%d\n%s' % (rsp.res, rsp.output)
                      wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
                      return []
                  elif isinstance(rsp, wireprototypes.pusherr):
                      rsp = '0\n%s\n' % rsp.res
                      wsgireq.respond(HTTP_OK, HGTYPE, body=rsp)
                      return []
                  elif isinstance(rsp, wireprototypes.ooberror):
                      rsp = rsp.message
                      wsgireq.respond(HTTP_OK, HGERRTYPE, body=rsp)
                      return []
                  raise error.ProgrammingError('hgweb.protocol internal failure', rsp)
              def _handlehttperror(e, wsgireq, req):
                  """Called when an ErrorResponse is raised during HTTP request processing."""
                  # TODO This response body assumes the failed command was
                  # "unbundle." That assumption is not always valid.
                  wsgireq.respond(e, HGTYPE, body='0\n%s\n' % pycompat.bytestr(e))
                  return ''
              def _sshv1respondbytes(fout, value):
                  """Send a bytes response for protocol version 1."""
                  fout.write('%d\n' % len(value))
                  fout.write(value)
                  fout.flush()
              def _sshv1respondstream(fout, source):
                  write = fout.write
                  for chunk in source.gen:
                      write(chunk)
                  fout.flush()
              def _sshv1respondooberror(fout, ferr, rsp):
                  ferr.write(b'%s\n-\n' % rsp)
                  ferr.flush()
                  fout.write(b'\n')
                  fout.flush()
              class sshv1protocolhandler(wireprototypes.baseprotocolhandler):
                  """Handler for requests services via version 1 of SSH protocol."""
                  def __init__(self, ui, fin, fout):
                      self._ui = ui
                      self._fin = fin
                      self._fout = fout
                  @property
                  def name(self):
                      return wireprototypes.SSHV1
                  def getargs(self, args):
                      data = {}
                      keys = args.split()
                      for n in xrange(len(keys)):
                          argline = self._fin.readline()[:-1]
                          arg, l = argline.split()
                          if arg not in keys:
                              raise error.Abort(_("unexpected parameter %r") % arg)
                          if arg == '*':
                              star = {}
                              for k in xrange(int(l)):
                                  argline = self._fin.readline()[:-1]
                                  arg, l = argline.split()
                                  val = self._fin.read(int(l))
                                  star[arg] = val
                              data['*'] = star
                          else:
                              val = self._fin.read(int(l))
                              data[arg] = val
                      return [data[k] for k in keys]
                  def forwardpayload(self, fpout):
                      # We initially send an empty response. This tells the client it is
                      # OK to start sending data. If a client sees any other response, it
                      # interprets it as an error.
                      _sshv1respondbytes(self._fout, b'')
                      # The file is in the form:
                      #
                      # <chunk size>\n<chunk>
                      # ...
                      # 0\n
                      count = int(self._fin.readline())
                      while count:
                          fpout.write(self._fin.read(count))
                          count = int(self._fin.readline())
                  @contextlib.contextmanager
                  def mayberedirectstdio(self):
                      yield None
                  def client(self):
                      client = encoding.environ.get('SSH_CLIENT', '').split(' ', 1)[0]
                      return 'remote:ssh:' + client
                  def addcapabilities(self, repo, caps):
                      return caps
                  def checkperm(self, perm):
                      pass
              class sshv2protocolhandler(sshv1protocolhandler):
                  """Protocol handler for version 2 of the SSH protocol."""
                  @property
                  def name(self):
                      return wireprototypes.SSHV2
              def _runsshserver(ui, repo, fin, fout, ev):
                  # This function operates like a state machine of sorts. The following
                  # states are defined:
                  #
                  # protov1-serving
                  #    Server is in protocol version 1 serving mode. Commands arrive on
                  #    new lines. These commands are processed in this state, one command
                  #    after the other.
                  #
                  # protov2-serving
                  #    Server is in protocol version 2 serving mode.
                  #
                  # upgrade-initial
                  #    The server is going to process an upgrade request.
                  #
                  # upgrade-v2-filter-legacy-handshake
                  #    The protocol is being upgraded to version 2. The server is expecting
                  #    the legacy handshake from version 1.
                  #
                  # upgrade-v2-finish
                  #    The upgrade to version 2 of the protocol is imminent.
                  #
                  # shutdown
                  #    The server is shutting down, possibly in reaction to a client event.
                  #
                  # And here are their transitions:
                  #
                  # protov1-serving -> shutdown
                  #    When server receives an empty request or encounters another
                  #    error.
                  #
                  # protov1-serving -> upgrade-initial
                  #    An upgrade request line was seen.
                  #
                  # upgrade-initial -> upgrade-v2-filter-legacy-handshake
                  #    Upgrade to version 2 in progress. Server is expecting to
                  #    process a legacy handshake.
                  #
                  # upgrade-v2-filter-legacy-handshake -> shutdown
                  #    Client did not fulfill upgrade handshake requirements.
                  #
                  # upgrade-v2-filter-legacy-handshake -> upgrade-v2-finish
                  #    Client fulfilled version 2 upgrade requirements. Finishing that
                  #    upgrade.
                  #
                  # upgrade-v2-finish -> protov2-serving
                  #    Protocol upgrade to version 2 complete. Server can now speak protocol
                  #    version 2.
                  #
                  # protov2-serving -> protov1-serving
                  #    Ths happens by default since protocol version 2 is the same as
                  #    version 1 except for the handshake.
                  state = 'protov1-serving'
                  proto = sshv1protocolhandler(ui, fin, fout)
                  protoswitched = False
                  while not ev.is_set():
                      if state == 'protov1-serving':
                          # Commands are issued on new lines.
                          request = fin.readline()[:-1]
                          # Empty lines signal to terminate the connection.
                          if not request:
                              state = 'shutdown'
                              continue
                          # It looks like a protocol upgrade request. Transition state to
                          # handle it.
                          if request.startswith(b'upgrade '):
                              if protoswitched:
                                  _sshv1respondooberror(fout, ui.ferr,
                                                        b'cannot upgrade protocols multiple '
                                                        b'times')
                                  state = 'shutdown'
                                  continue
                              state = 'upgrade-initial'
                              continue
                          available = wireproto.commands.commandavailable(request, proto)
                          # This command isn't available. Send an empty response and go
                          # back to waiting for a new command.
                          if not available:
                              _sshv1respondbytes(fout, b'')
                              continue
                          rsp = wireproto.dispatch(repo, proto, request)
                          if isinstance(rsp, bytes):
                              _sshv1respondbytes(fout, rsp)
                          elif isinstance(rsp, wireprototypes.bytesresponse):
                              _sshv1respondbytes(fout, rsp.data)
                          elif isinstance(rsp, wireprototypes.streamres):
                              _sshv1respondstream(fout, rsp)
                          elif isinstance(rsp, wireprototypes.streamreslegacy):
                              _sshv1respondstream(fout, rsp)
                          elif isinstance(rsp, wireprototypes.pushres):
                              _sshv1respondbytes(fout, b'')
                              _sshv1respondbytes(fout, b'%d' % rsp.res)
                          elif isinstance(rsp, wireprototypes.pusherr):
                              _sshv1respondbytes(fout, rsp.res)
                          elif isinstance(rsp, wireprototypes.ooberror):
                              _sshv1respondooberror(fout, ui.ferr, rsp.message)
                          else:
                              raise error.ProgrammingError('unhandled response type from '
                                                           'wire protocol command: %s' % rsp)
                      # For now, protocol version 2 serving just goes back to version 1.
                      elif state == 'protov2-serving':
                          state = 'protov1-serving'
                          continue
                      elif state == 'upgrade-initial':
                          # We should never transition into this state if we've switched
                          # protocols.
                          assert not protoswitched
                          assert proto.name == wireprototypes.SSHV1
                          # Expected: upgrade <token> <capabilities>
                          # If we get something else, the request is malformed. It could be
                          # from a future client that has altered the upgrade line content.
                          # We treat this as an unknown command.
                          try:
                              token, caps = request.split(b' ')[1:]
                          except ValueError:
                              _sshv1respondbytes(fout, b'')
                              state = 'protov1-serving'
                              continue
                          # Send empty response if we don't support upgrading protocols.
                          if not ui.configbool('experimental', 'sshserver.support-v2'):
                              _sshv1respondbytes(fout, b'')
                              state = 'protov1-serving'
                              continue
                          try:
                              caps = urlreq.parseqs(caps)
                          except ValueError:
                              _sshv1respondbytes(fout, b'')
                              state = 'protov1-serving'
                              continue
                          # We don't see an upgrade request to protocol version 2. Ignore
                          # the upgrade request.
                          wantedprotos = caps.get(b'proto', [b''])[0]
                          if SSHV2 not in wantedprotos:
                              _sshv1respondbytes(fout, b'')
                              state = 'protov1-serving'
                              continue
                          # It looks like we can honor this upgrade request to protocol 2.
                          # Filter the rest of the handshake protocol request lines.
                          state = 'upgrade-v2-filter-legacy-handshake'
                          continue
                      elif state == 'upgrade-v2-filter-legacy-handshake':
                          # Client should have sent legacy handshake after an ``upgrade``
                          # request. Expected lines:
                          #
                          #    hello
                          #    between
                          #    pairs 81
                          #    0000...-0000...
                          ok = True
                          for line in (b'hello', b'between', b'pairs 81'):
                              request = fin.readline()[:-1]
                              if request != line:
                                  _sshv1respondooberror(fout, ui.ferr,
                                                        b'malformed handshake protocol: '
                                                        b'missing %s' % line)
                                  ok = False
                                  state = 'shutdown'
                                  break
                          if not ok:
                              continue
                          request = fin.read(81)
                          if request != b'%s-%s' % (b'0' * 40, b'0' * 40):
                              _sshv1respondooberror(fout, ui.ferr,
                                                    b'malformed handshake protocol: '
                                                    b'missing between argument value')
                              state = 'shutdown'
                              continue
                          state = 'upgrade-v2-finish'
                          continue
                      elif state == 'upgrade-v2-finish':
                          # Send the upgrade response.
                          fout.write(b'upgraded %s %s\n' % (token, SSHV2))
                          servercaps = wireproto.capabilities(repo, proto)
                          rsp = b'capabilities: %s' % servercaps.data
                          fout.write(b'%d\n%s\n' % (len(rsp), rsp))
                          fout.flush()
                          proto = sshv2protocolhandler(ui, fin, fout)
                          protoswitched = True
                          state = 'protov2-serving'
                          continue
                      elif state == 'shutdown':
                          break
                      else:
                          raise error.ProgrammingError('unhandled ssh server state: %s' %
                                                       state)
              class sshserver(object):
                  def __init__(self, ui, repo, logfh=None):
                      self._ui = ui
                      self._repo = repo
                      self._fin = ui.fin
                      self._fout = ui.fout
                      # Log write I/O to stdout and stderr if configured.
                      if logfh:
                          self._fout = util.makeloggingfileobject(
                              logfh, self._fout, 'o', logdata=True)
                          ui.ferr = util.makeloggingfileobject(
                              logfh, ui.ferr, 'e', logdata=True)
                      hook.redirect(True)
                      ui.fout = repo.ui.fout = ui.ferr
                      # Prevent insertion/deletion of CRs
                      util.setbinary(self._fin)
                      util.setbinary(self._fout)
                  def serve_forever(self):
                      self.serveuntil(threading.Event())
                      sys.exit(0)
                  def serveuntil(self, ev):
                      """Serve until a threading.Event is set."""
                      _runsshserver(self._ui, self._repo, self._fin, self._fout, ev)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages