httpconnection.py
287 lines
| 10.7 KiB
| text/x-python
|
PythonLexer
/ mercurial / httpconnection.py
Augie Fackler
|
r14244 | # httpconnection.py - urllib2 handler for new http support | ||
# | ||||
# Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> | ||||
# Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> | ||||
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | ||||
# Copyright 2011 Google, Inc. | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
import logging | ||||
import socket | ||||
import urllib | ||||
import urllib2 | ||||
import os | ||||
from mercurial import httpclient | ||||
from mercurial import sslutil | ||||
from mercurial import util | ||||
from mercurial.i18n import _ | ||||
# moved here from url.py to avoid a cycle | ||||
class httpsendfile(object): | ||||
"""This is a wrapper around the objects returned by python's "open". | ||||
Mads Kiilerich
|
r15152 | Its purpose is to send file-like objects via HTTP. | ||
It do however not define a __len__ attribute because the length | ||||
might be more than Py_ssize_t can handle. | ||||
Augie Fackler
|
r14244 | """ | ||
def __init__(self, ui, *args, **kwargs): | ||||
# We can't just "self._data = open(*args, **kwargs)" here because there | ||||
# is an "open" function defined in this module that shadows the global | ||||
# one | ||||
self.ui = ui | ||||
self._data = open(*args, **kwargs) | ||||
self.seek = self._data.seek | ||||
self.close = self._data.close | ||||
self.write = self._data.write | ||||
Mads Kiilerich
|
r15152 | self.length = os.fstat(self._data.fileno()).st_size | ||
Augie Fackler
|
r14244 | self._pos = 0 | ||
Martin Geisler
|
r15791 | self._total = self.length // 1024 * 2 | ||
Augie Fackler
|
r14244 | |||
def read(self, *args, **kwargs): | ||||
try: | ||||
ret = self._data.read(*args, **kwargs) | ||||
except EOFError: | ||||
self.ui.progress(_('sending'), None) | ||||
self._pos += len(ret) | ||||
# We pass double the max for total because we currently have | ||||
# to send the bundle twice in the case of a server that | ||||
# requires authentication. Since we can't know until we try | ||||
# once whether authentication will be required, just lie to | ||||
# the user and maybe the push succeeds suddenly at 50%. | ||||
Martin Geisler
|
r15791 | self.ui.progress(_('sending'), self._pos // 1024, | ||
Augie Fackler
|
r14244 | unit=_('kb'), total=self._total) | ||
return ret | ||||
# moved here from url.py to avoid a cycle | ||||
Patrick Mezard
|
r15025 | def readauthforuri(ui, uri, user): | ||
Augie Fackler
|
r14244 | # Read configuration | ||
config = dict() | ||||
for key, val in ui.configitems('auth'): | ||||
if '.' not in key: | ||||
ui.warn(_("ignoring invalid [auth] key '%s'\n") % key) | ||||
continue | ||||
group, setting = key.rsplit('.', 1) | ||||
gdict = config.setdefault(group, dict()) | ||||
if setting in ('username', 'cert', 'key'): | ||||
val = util.expandpath(val) | ||||
gdict[setting] = val | ||||
# Find the best match | ||||
Matt Mackall
|
r15288 | if '://' in uri: | ||
scheme, hostpath = uri.split('://', 1) | ||||
else: | ||||
Mads Kiilerich
|
r17428 | # Python 2.4.1 doesn't provide the full URI | ||
Matt Mackall
|
r15288 | scheme, hostpath = 'http', uri | ||
Patrick Mezard
|
r15005 | bestuser = None | ||
Augie Fackler
|
r14244 | bestlen = 0 | ||
bestauth = None | ||||
for group, auth in config.iteritems(): | ||||
Patrick Mezard
|
r15005 | if user and user != auth.get('username', user): | ||
# If a username was set in the URI, the entry username | ||||
# must either match it or be unset | ||||
continue | ||||
Augie Fackler
|
r14244 | prefix = auth.get('prefix') | ||
if not prefix: | ||||
continue | ||||
p = prefix.split('://', 1) | ||||
if len(p) > 1: | ||||
schemes, prefix = [p[0]], p[1] | ||||
else: | ||||
schemes = (auth.get('schemes') or 'https').split() | ||||
if (prefix == '*' or hostpath.startswith(prefix)) and \ | ||||
Patrick Mezard
|
r15005 | (len(prefix) > bestlen or (len(prefix) == bestlen and \ | ||
not bestuser and 'username' in auth)) \ | ||||
and scheme in schemes: | ||||
Augie Fackler
|
r14244 | bestlen = len(prefix) | ||
bestauth = group, auth | ||||
Patrick Mezard
|
r15005 | bestuser = auth.get('username') | ||
if user and not bestuser: | ||||
auth['username'] = user | ||||
Augie Fackler
|
r14244 | return bestauth | ||
# Mercurial (at least until we can remove the old codepath) requires | ||||
# that the http response object be sufficiently file-like, so we | ||||
# provide a close() method here. | ||||
class HTTPResponse(httpclient.HTTPResponse): | ||||
def close(self): | ||||
pass | ||||
class HTTPConnection(httpclient.HTTPConnection): | ||||
response_class = HTTPResponse | ||||
def request(self, method, uri, body=None, headers={}): | ||||
if isinstance(body, httpsendfile): | ||||
body.seek(0) | ||||
httpclient.HTTPConnection.request(self, method, uri, body=body, | ||||
headers=headers) | ||||
_configuredlogging = False | ||||
Augie Fackler
|
r14375 | LOGFMT = '%(levelname)s:%(name)s:%(lineno)d:%(message)s' | ||
Augie Fackler
|
r14244 | # Subclass BOTH of these because otherwise urllib2 "helpfully" | ||
# reinserts them since it notices we don't include any subclasses of | ||||
# them. | ||||
class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler): | ||||
def __init__(self, ui, pwmgr): | ||||
global _configuredlogging | ||||
urllib2.AbstractHTTPHandler.__init__(self) | ||||
self.ui = ui | ||||
self.pwmgr = pwmgr | ||||
self._connections = {} | ||||
loglevel = ui.config('ui', 'http2debuglevel', default=None) | ||||
if loglevel and not _configuredlogging: | ||||
_configuredlogging = True | ||||
Augie Fackler
|
r14294 | logger = logging.getLogger('mercurial.httpclient') | ||
Augie Fackler
|
r14244 | logger.setLevel(getattr(logging, loglevel.upper())) | ||
Augie Fackler
|
r14375 | handler = logging.StreamHandler() | ||
handler.setFormatter(logging.Formatter(LOGFMT)) | ||||
logger.addHandler(handler) | ||||
Augie Fackler
|
r14244 | |||
def close_all(self): | ||||
"""Close and remove all connection objects being kept for reuse.""" | ||||
for openconns in self._connections.values(): | ||||
for conn in openconns: | ||||
conn.close() | ||||
self._connections = {} | ||||
# shamelessly borrowed from urllib2.AbstractHTTPHandler | ||||
Augie Fackler
|
r14346 | def do_open(self, http_class, req, use_ssl): | ||
Augie Fackler
|
r14244 | """Return an addinfourl object for the request, using http_class. | ||
http_class must implement the HTTPConnection API from httplib. | ||||
The addinfourl return value is a file-like object. It also | ||||
has methods and attributes including: | ||||
- info(): return a mimetools.Message object for the headers | ||||
- geturl(): return the original request URL | ||||
- code: HTTP status code | ||||
""" | ||||
# If using a proxy, the host returned by get_host() is | ||||
# actually the proxy. On Python 2.6.1, the real destination | ||||
# hostname is encoded in the URI in the urllib2 request | ||||
# object. On Python 2.6.5, it's stored in the _tunnel_host | ||||
# attribute which has no accessor. | ||||
tunhost = getattr(req, '_tunnel_host', None) | ||||
host = req.get_host() | ||||
if tunhost: | ||||
proxyhost = host | ||||
host = tunhost | ||||
elif req.has_proxy(): | ||||
proxyhost = req.get_host() | ||||
host = req.get_selector().split('://', 1)[1].split('/', 1)[0] | ||||
else: | ||||
proxyhost = None | ||||
if proxyhost: | ||||
if ':' in proxyhost: | ||||
# Note: this means we'll explode if we try and use an | ||||
# IPv6 http proxy. This isn't a regression, so we | ||||
# won't worry about it for now. | ||||
proxyhost, proxyport = proxyhost.rsplit(':', 1) | ||||
else: | ||||
proxyport = 3128 # squid default | ||||
proxy = (proxyhost, proxyport) | ||||
else: | ||||
proxy = None | ||||
if not host: | ||||
raise urllib2.URLError('no host given') | ||||
Augie Fackler
|
r14346 | connkey = use_ssl, host, proxy | ||
allconns = self._connections.get(connkey, []) | ||||
Augie Fackler
|
r14244 | conns = [c for c in allconns if not c.busy()] | ||
if conns: | ||||
h = conns[0] | ||||
else: | ||||
if allconns: | ||||
self.ui.debug('all connections for %s busy, making a new ' | ||||
'one\n' % host) | ||||
timeout = None | ||||
if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: | ||||
timeout = req.timeout | ||||
h = http_class(host, timeout=timeout, proxy_hostport=proxy) | ||||
Augie Fackler
|
r14346 | self._connections.setdefault(connkey, []).append(h) | ||
Augie Fackler
|
r14244 | |||
headers = dict(req.headers) | ||||
headers.update(req.unredirected_hdrs) | ||||
headers = dict( | ||||
(name.title(), val) for name, val in headers.items()) | ||||
try: | ||||
path = req.get_selector() | ||||
if '://' in path: | ||||
path = path.split('://', 1)[1].split('/', 1)[1] | ||||
if path[0] != '/': | ||||
path = '/' + path | ||||
h.request(req.get_method(), path, req.data, headers) | ||||
r = h.getresponse() | ||||
except socket.error, err: # XXX what error? | ||||
raise urllib2.URLError(err) | ||||
# Pick apart the HTTPResponse object to get the addinfourl | ||||
# object initialized properly. | ||||
r.recv = r.read | ||||
resp = urllib.addinfourl(r, r.headers, req.get_full_url()) | ||||
resp.code = r.status | ||||
resp.msg = r.reason | ||||
return resp | ||||
# httplib always uses the given host/port as the socket connect | ||||
# target, and then allows full URIs in the request path, which it | ||||
# then observes and treats as a signal to do proxying instead. | ||||
def http_open(self, req): | ||||
if req.get_full_url().startswith('https'): | ||||
return self.https_open(req) | ||||
Augie Fackler
|
r17787 | def makehttpcon(*args, **kwargs): | ||
Matt Mackall
|
r17836 | k2 = dict(kwargs) | ||
k2['use_ssl'] = False | ||||
return HTTPConnection(*args, **k2) | ||||
Augie Fackler
|
r17787 | return self.do_open(makehttpcon, req, False) | ||
Augie Fackler
|
r14244 | |||
def https_open(self, req): | ||||
Patrick Mezard
|
r15025 | # req.get_full_url() does not contain credentials and we may | ||
# need them to match the certificates. | ||||
url = req.get_full_url() | ||||
user, password = self.pwmgr.find_stored_password(url) | ||||
res = readauthforuri(self.ui, url, user) | ||||
Augie Fackler
|
r14244 | if res: | ||
group, auth = res | ||||
self.auth = auth | ||||
self.ui.debug("using auth.%s.* for authentication\n" % group) | ||||
else: | ||||
self.auth = None | ||||
Augie Fackler
|
r14346 | return self.do_open(self._makesslconnection, req, True) | ||
Augie Fackler
|
r14244 | |||
def _makesslconnection(self, host, port=443, *args, **kwargs): | ||||
keyfile = None | ||||
certfile = None | ||||
if args: # key_file | ||||
keyfile = args.pop(0) | ||||
if args: # cert_file | ||||
certfile = args.pop(0) | ||||
# if the user has specified different key/cert files in | ||||
# hgrc, we prefer these | ||||
if self.auth and 'key' in self.auth and 'cert' in self.auth: | ||||
keyfile = self.auth['key'] | ||||
certfile = self.auth['cert'] | ||||
# let host port take precedence | ||||
if ':' in host and '[' not in host or ']:' in host: | ||||
host, port = host.rsplit(':', 1) | ||||
port = int(port) | ||||
if '[' in host: | ||||
host = host[1:-1] | ||||
Augie Fackler
|
r19809 | kwargs['keyfile'] = keyfile | ||
kwargs['certfile'] = certfile | ||||
Augie Fackler
|
r14244 | |||
kwargs.update(sslutil.sslkwargs(self.ui, host)) | ||||
con = HTTPConnection(host, port, use_ssl=True, | ||||
Augie Fackler
|
r19809 | ssl_wrap_socket=sslutil.ssl_wrap_socket, | ||
Augie Fackler
|
r14244 | ssl_validator=sslutil.validator(self.ui, host), | ||
**kwargs) | ||||
return con | ||||