##// END OF EJS Templates
merge with stable
merge with stable

File last commit:

r13902:fab10e7c merge default
r13941:924f40b9 merge default
Show More
url.py
961 lines | 33.9 KiB | text/x-python | PythonLexer
Benoit Boissinot
factor out the url handling from httprepo...
r7270 # url.py - HTTP handling for mercurial
#
# Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
# Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Brodie Rao
url: be stricter about detecting schemes...
r13848 import urllib, urllib2, httplib, os, socket, cStringIO, re
Renato Cunha
url.py: removed 'file' inheritance in the httpsendfile class...
r11880 import __builtin__
Benoit Boissinot
factor out the url handling from httprepo...
r7270 from i18n import _
import keepalive, util
Brodie Rao
url: provide url object...
r13770 class url(object):
"""Reliable URL parser.
This parses URLs and provides attributes for the following
components:
<scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
Missing components are set to None. The only exception is
fragment, which is set to '' if present but empty.
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 If parsefragment is False, fragment is included in query. If
parsequery is False, query is included in path. If both are
Brodie Rao
url: provide url object...
r13770 False, both fragment and query are included in path.
See http://www.ietf.org/rfc/rfc2396.txt for more information.
Brodie Rao
url: special case bundle URL parsing to preserve backwards compatibility...
r13816 Note that for backward compatibility reasons, bundle URLs do not
take host names. That means 'bundle://../' has a path of '../'.
Brodie Rao
url: provide url object...
r13770 Examples:
>>> url('http://www.ietf.org/rfc/rfc2396.txt')
<url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
>>> url('ssh://[::1]:2200//home/joe/repo')
<url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
>>> url('file:///home/joe/repo')
<url scheme: 'file', path: '/home/joe/repo'>
>>> url('bundle:foo')
<url scheme: 'bundle', path: 'foo'>
Brodie Rao
url: special case bundle URL parsing to preserve backwards compatibility...
r13816 >>> url('bundle://../foo')
<url scheme: 'bundle', path: '../foo'>
Matt Mackall
url: deal with drive letters
r13807 >>> url('c:\\\\foo\\\\bar')
<url path: 'c:\\\\foo\\\\bar'>
Brodie Rao
url: provide url object...
r13770
Authentication credentials:
>>> url('ssh://joe:xyz@x/repo')
<url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
>>> url('ssh://joe@x/repo')
<url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
Query strings and fragments:
>>> url('http://host/a?b#c')
<url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
Brodie Rao
url: provide url object...
r13770 <url scheme: 'http', host: 'host', path: 'a?b#c'>
"""
_safechars = "!~*'()+"
_safepchars = "/!~*'()+"
Brodie Rao
url: be stricter about detecting schemes...
r13848 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
Brodie Rao
url: provide url object...
r13770
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 def __init__(self, path, parsequery=True, parsefragment=True):
Brodie Rao
url: provide url object...
r13770 # We slowly chomp away at path until we have only the path left
self.scheme = self.user = self.passwd = self.host = None
self.port = self.path = self.query = self.fragment = None
self._localpath = True
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 self._hostport = ''
self._origpath = path
Brodie Rao
url: provide url object...
r13770
Matt Mackall
url: deal with drive letters
r13807 # special case for Windows drive letters
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 if hasdriveletter(path):
Matt Mackall
url: deal with drive letters
r13807 self.path = path
return
Brodie Rao
url: special case bundle URL parsing to preserve backwards compatibility...
r13816 # For compatibility reasons, we can't handle bundle paths as
# normal URLS
if path.startswith('bundle:'):
self.scheme = 'bundle'
path = path[7:]
if path.startswith('//'):
path = path[2:]
self.path = path
return
Brodie Rao
url: be stricter about detecting schemes...
r13848 if self._matchscheme(path):
Brodie Rao
url: provide url object...
r13770 parts = path.split(':', 1)
if parts[0]:
self.scheme, path = parts
self._localpath = False
if not path:
path = None
if self._localpath:
self.path = ''
return
else:
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 if parsefragment and '#' in path:
Brodie Rao
url: provide url object...
r13770 path, self.fragment = path.split('#', 1)
if not path:
path = None
if self._localpath:
self.path = path
return
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 if parsequery and '?' in path:
Brodie Rao
url: provide url object...
r13770 path, self.query = path.split('?', 1)
if not path:
path = None
if not self.query:
self.query = None
# // is required to specify a host/authority
if path and path.startswith('//'):
parts = path[2:].split('/', 1)
if len(parts) > 1:
self.host, path = parts
path = path
else:
self.host = parts[0]
path = None
if not self.host:
self.host = None
if path:
path = '/' + path
if self.host and '@' in self.host:
self.user, self.host = self.host.rsplit('@', 1)
if ':' in self.user:
self.user, self.passwd = self.user.split(':', 1)
if not self.host:
self.host = None
Michael Glassford
schemes: fix // breakage with Python 2.6.5 (issue2111)...
r11035
Brodie Rao
url: provide url object...
r13770 # Don't split on colons in IPv6 addresses without ports
if (self.host and ':' in self.host and
not (self.host.startswith('[') and self.host.endswith(']'))):
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 self._hostport = self.host
Brodie Rao
url: provide url object...
r13770 self.host, self.port = self.host.rsplit(':', 1)
if not self.host:
self.host = None
Brodie Rao
url: abort on file:// URLs with non-localhost hosts
r13817
if (self.host and self.scheme == 'file' and
self.host not in ('localhost', '127.0.0.1', '[::1]')):
raise util.Abort(_('file:// URLs can only refer to localhost'))
Brodie Rao
url: provide url object...
r13770 self.path = path
for a in ('user', 'passwd', 'host', 'port',
'path', 'query', 'fragment'):
v = getattr(self, a)
if v is not None:
setattr(self, a, urllib.unquote(v))
def __repr__(self):
attrs = []
for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
'query', 'fragment'):
v = getattr(self, a)
if v is not None:
attrs.append('%s: %r' % (a, v))
return '<url %s>' % ', '.join(attrs)
def __str__(self):
"""Join the URL's components back into a URL string.
Examples:
>>> str(url('http://user:pw@host:80/?foo#bar'))
'http://user:pw@host:80/?foo#bar'
>>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
'ssh://user:pw@[::1]:2200//home/joe#'
>>> str(url('http://localhost:80//'))
'http://localhost:80//'
>>> str(url('http://localhost:80/'))
'http://localhost:80/'
>>> str(url('http://localhost:80'))
Brodie Rao
url: add trailing slashes to URLs with hostnames that don't have one...
r13815 'http://localhost:80/'
Brodie Rao
url: provide url object...
r13770 >>> str(url('bundle:foo'))
'bundle:foo'
Brodie Rao
url: special case bundle URL parsing to preserve backwards compatibility...
r13816 >>> str(url('bundle://../foo'))
'bundle:../foo'
Brodie Rao
url: provide url object...
r13770 >>> str(url('path'))
'path'
"""
if self._localpath:
s = self.path
Brodie Rao
url: special case bundle URL parsing to preserve backwards compatibility...
r13816 if self.scheme == 'bundle':
s = 'bundle:' + s
Brodie Rao
url: provide url object...
r13770 if self.fragment:
s += '#' + self.fragment
return s
s = self.scheme + ':'
if (self.user or self.passwd or self.host or
self.scheme and not self.path):
s += '//'
if self.user:
s += urllib.quote(self.user, safe=self._safechars)
if self.passwd:
s += ':' + urllib.quote(self.passwd, safe=self._safechars)
if self.user or self.passwd:
s += '@'
if self.host:
if not (self.host.startswith('[') and self.host.endswith(']')):
s += urllib.quote(self.host)
else:
s += self.host
if self.port:
s += ':' + urllib.quote(self.port)
Brodie Rao
url: add trailing slashes to URLs with hostnames that don't have one...
r13815 if self.host:
Brodie Rao
url: provide url object...
r13770 s += '/'
if self.path:
s += urllib.quote(self.path, safe=self._safepchars)
if self.query:
s += '?' + urllib.quote(self.query, safe=self._safepchars)
if self.fragment is not None:
s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
return s
def authinfo(self):
user, passwd = self.user, self.passwd
try:
self.user, self.passwd = None, None
s = str(self)
finally:
self.user, self.passwd = user, passwd
if not self.user:
return (s, None)
return (s, (None, (str(self), self.host),
self.user, self.passwd or ''))
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 def localpath(self):
if self.scheme == 'file' or self.scheme == 'bundle':
path = self.path or '/'
# For Windows, we need to promote hosts containing drive
# letters to paths with drive letters.
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 if hasdriveletter(self._hostport):
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 path = self._hostport + '/' + self.path
elif self.host is not None and self.path:
path = '/' + path
# We also need to handle the case of file:///C:/, which
# should return C:/, not /C:/.
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 elif hasdriveletter(path):
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 # Strip leading slash from paths with drive names
return path[1:]
return path
return self._origpath
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 def hasscheme(path):
Brodie Rao
url: provide url object...
r13770 return bool(url(path).scheme)
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 def hasdriveletter(path):
Brodie Rao
url: move drive letter checking into has_drive_letter() for extensions...
r13814 return path[1:2] == ':' and path[0:1].isalpha()
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826 def localpath(path):
Matt Mackall
url: nuke some newly-introduced underbars in identifiers
r13827 return url(path, parsequery=False, parsefragment=False).localpath()
Brodie Rao
url: refactor util.drop_scheme() and hg.localpath() into url.localpath()...
r13826
Brodie Rao
url: use url.url in hidepassword() and removeauth()
r13772 def hidepassword(u):
Benoit Boissinot
factor out the url handling from httprepo...
r7270 '''hide user credential in a url string'''
Brodie Rao
url: use url.url in hidepassword() and removeauth()
r13772 u = url(u)
if u.passwd:
u.passwd = '***'
return str(u)
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Brodie Rao
url: use url.url in hidepassword() and removeauth()
r13772 def removeauth(u):
Benoit Boissinot
factor out the url handling from httprepo...
r7270 '''remove all authentication information from a url string'''
Brodie Rao
url: use url.url in hidepassword() and removeauth()
r13772 u = url(u)
u.user = u.passwd = None
return str(u)
Benoit Boissinot
factor out the url handling from httprepo...
r7270
def netlocsplit(netloc):
'''split [user[:passwd]@]host[:port] into 4-tuple.'''
a = netloc.find('@')
if a == -1:
user, passwd = None, None
else:
Matt Mackall
many, many trivial check-code fixups
r10282 userpass, netloc = netloc[:a], netloc[a + 1:]
Benoit Boissinot
factor out the url handling from httprepo...
r7270 c = userpass.find(':')
if c == -1:
user, passwd = urllib.unquote(userpass), None
else:
user = urllib.unquote(userpass[:c])
Matt Mackall
many, many trivial check-code fixups
r10282 passwd = urllib.unquote(userpass[c + 1:])
Benoit Boissinot
factor out the url handling from httprepo...
r7270 c = netloc.find(':')
if c == -1:
host, port = netloc, None
else:
Matt Mackall
many, many trivial check-code fixups
r10282 host, port = netloc[:c], netloc[c + 1:]
Benoit Boissinot
factor out the url handling from httprepo...
r7270 return host, port, user, passwd
def netlocunsplit(host, port, user=None, passwd=None):
'''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
if port:
hostport = host + ':' + port
else:
hostport = host
if user:
Benoit Boissinot
url: correctly quote '/' in user and password embedded in urls
r10484 quote = lambda s: urllib.quote(s, safe='')
Benoit Boissinot
factor out the url handling from httprepo...
r7270 if passwd:
Benoit Boissinot
url: correctly quote '/' in user and password embedded in urls
r10484 userpass = quote(user) + ':' + quote(passwd)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 else:
Benoit Boissinot
url: correctly quote '/' in user and password embedded in urls
r10484 userpass = quote(user)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 return userpass + '@' + hostport
return hostport
Steve Borho
url: move [auth] parsing out into a utility function...
r13371 def readauthforuri(ui, uri):
# Read configuration
config = dict()
for key, val in ui.configitems('auth'):
if '.' not in key:
ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
continue
group, setting = key.rsplit('.', 1)
gdict = config.setdefault(group, dict())
if setting in ('username', 'cert', 'key'):
val = util.expandpath(val)
gdict[setting] = val
# Find the best match
scheme, hostpath = uri.split('://', 1)
bestlen = 0
bestauth = None
Steve Borho
url: return the matched authentication group name from readauthforuri()...
r13372 for group, auth in config.iteritems():
Steve Borho
url: move [auth] parsing out into a utility function...
r13371 prefix = auth.get('prefix')
if not prefix:
continue
p = prefix.split('://', 1)
if len(p) > 1:
schemes, prefix = [p[0]], p[1]
else:
schemes = (auth.get('schemes') or 'https').split()
if (prefix == '*' or hostpath.startswith(prefix)) and \
len(prefix) > bestlen and scheme in schemes:
bestlen = len(prefix)
Steve Borho
url: return the matched authentication group name from readauthforuri()...
r13372 bestauth = group, auth
Steve Borho
url: move [auth] parsing out into a utility function...
r13371 return bestauth
Benoit Boissinot
factor out the url handling from httprepo...
r7270 _safe = ('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789' '_.-/')
_safeset = None
_hex = None
def quotepath(path):
'''quote the path part of a URL
This is similar to urllib.quote, but it also tries to avoid
quoting things twice (inspired by wget):
>>> quotepath('abc def')
'abc%20def'
>>> quotepath('abc%20def')
'abc%20def'
>>> quotepath('abc%20 def')
'abc%20%20def'
>>> quotepath('abc def%20')
'abc%20def%20'
>>> quotepath('abc def%2')
'abc%20def%252'
>>> quotepath('abc def%')
'abc%20def%25'
'''
global _safeset, _hex
if _safeset is None:
Martin Geisler
util: use built-in set and frozenset...
r8150 _safeset = set(_safe)
_hex = set('abcdefABCDEF0123456789')
Benoit Boissinot
factor out the url handling from httprepo...
r7270 l = list(path)
for i in xrange(len(l)):
c = l[i]
Matt Mackall
many, many trivial check-code fixups
r10282 if (c == '%' and i + 2 < len(l) and
l[i + 1] in _hex and l[i + 2] in _hex):
Benoit Boissinot
factor out the url handling from httprepo...
r7270 pass
elif c not in _safeset:
l[i] = '%%%02X' % ord(c)
return ''.join(l)
class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
def __init__(self, ui):
urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
self.ui = ui
def find_user_password(self, realm, authuri):
authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
self, realm, authuri)
user, passwd = authinfo
if user and passwd:
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 self._writedebug(user, passwd)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 return (user, passwd)
Sune Foldager
url: fix bug in passwordmgr related to auth configuration...
r8344 if not user:
Steve Borho
url: return the matched authentication group name from readauthforuri()...
r13372 res = readauthforuri(self.ui, authuri)
if res:
group, auth = res
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847 user, passwd = auth.get('username'), auth.get('password')
Steve Borho
url: return the matched authentication group name from readauthforuri()...
r13372 self.ui.debug("using auth.%s.* for authentication\n" % group)
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 if not user or not passwd:
if not self.ui.interactive():
raise util.Abort(_('http authorization required'))
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 self.ui.write(_("http authorization required\n"))
timeless
url: show realm/user when asking for username/password
r12862 self.ui.write(_("realm: %s\n") % realm)
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 if user:
timeless
url: show realm/user when asking for username/password
r12862 self.ui.write(_("user: %s\n") % user)
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 else:
user = self.ui.prompt(_("user:"), default=None)
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 if not passwd:
passwd = self.ui.getpass()
Benoit Boissinot
factor out the url handling from httprepo...
r7270
self.add_password(realm, authuri, user, passwd)
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 self._writedebug(user, passwd)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 return (user, passwd)
Sune Foldager
allow http authentication information to be specified in the configuration
r8333 def _writedebug(self, user, passwd):
msg = _('http auth: user %s, password %s\n')
self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
Benoit Boissinot
factor out the url handling from httprepo...
r7270 class proxyhandler(urllib2.ProxyHandler):
def __init__(self, ui):
proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
# XXX proxyauthinfo = None
if proxyurl:
# proxy can be proper url or host[:port]
if not (proxyurl.startswith('http:') or
proxyurl.startswith('https:')):
proxyurl = 'http://' + proxyurl + '/'
Brodie Rao
url: use url.url in proxyhandler
r13820 proxy = url(proxyurl)
if not proxy.user:
proxy.user = ui.config("http_proxy", "user")
proxy.passwd = ui.config("http_proxy", "passwd")
Benoit Boissinot
factor out the url handling from httprepo...
r7270
# see if we should use a proxy for this url
Matt Mackall
many, many trivial check-code fixups
r10282 no_list = ["localhost", "127.0.0.1"]
Benoit Boissinot
factor out the url handling from httprepo...
r7270 no_list.extend([p.lower() for
p in ui.configlist("http_proxy", "no")])
no_list.extend([p.strip().lower() for
p in os.getenv("no_proxy", '').split(',')
if p.strip()])
# "http_proxy.always" config is for running tests on localhost
if ui.configbool("http_proxy", "always"):
self.no_list = []
else:
self.no_list = no_list
Brodie Rao
url: use url.url in proxyhandler
r13820 proxyurl = str(proxy)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 proxies = {'http': proxyurl, 'https': proxyurl}
Martin Geisler
do not attempt to translate ui.debug output
r9467 ui.debug('proxying through http://%s:%s\n' %
Brodie Rao
url: use url.url in proxyhandler
r13820 (proxy.host, proxy.port))
Benoit Boissinot
factor out the url handling from httprepo...
r7270 else:
proxies = {}
# urllib2 takes proxy values from the environment and those
# will take precedence if found, so drop them
for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
try:
if env in os.environ:
del os.environ[env]
except OSError:
pass
urllib2.ProxyHandler.__init__(self, proxies)
self.ui = ui
def proxy_open(self, req, proxy, type_):
host = req.get_host().split(':')[0]
if host in self.no_list:
return None
# work around a bug in Python < 2.4.2
# (it leaves a "\n" at the end of Proxy-authorization headers)
baseclass = req.__class__
class _request(baseclass):
def add_header(self, key, val):
if key.lower() == 'proxy-authorization':
val = val.strip()
return baseclass.add_header(self, key, val)
req.__class__ = _request
return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
Renato Cunha
url.py: removed 'file' inheritance in the httpsendfile class...
r11880 class httpsendfile(object):
"""This is a wrapper around the objects returned by python's "open".
Its purpose is to send file-like objects via HTTP and, to do so, it
defines a __len__ attribute to feed the Content-Length header.
"""
Augie Fackler
httpsendfile: record progress information during read()...
r13115 def __init__(self, ui, *args, **kwargs):
Renato Cunha
url.py: removed 'file' inheritance in the httpsendfile class...
r11880 # We can't just "self._data = open(*args, **kwargs)" here because there
# is an "open" function defined in this module that shadows the global
# one
Augie Fackler
httpsendfile: record progress information during read()...
r13115 self.ui = ui
Renato Cunha
url.py: removed 'file' inheritance in the httpsendfile class...
r11880 self._data = __builtin__.open(*args, **kwargs)
self.seek = self._data.seek
self.close = self._data.close
self.write = self._data.write
Augie Fackler
httpsendfile: record progress information during read()...
r13115 self._len = os.fstat(self._data.fileno()).st_size
self._pos = 0
self._total = len(self) / 1024 * 2
def read(self, *args, **kwargs):
try:
ret = self._data.read(*args, **kwargs)
except EOFError:
self.ui.progress(_('sending'), None)
self._pos += len(ret)
# We pass double the max for total because we currently have
# to send the bundle twice in the case of a server that
# requires authentication. Since we can't know until we try
# once whether authentication will be required, just lie to
# the user and maybe the push succeeds suddenly at 50%.
self.ui.progress(_('sending'), self._pos / 1024,
unit=_('kb'), total=self._total)
return ret
Renato Cunha
url.py: removed 'file' inheritance in the httpsendfile class...
r11880
Benoit Boissinot
factor out the url handling from httprepo...
r7270 def __len__(self):
Augie Fackler
httpsendfile: record progress information during read()...
r13115 return self._len
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Mads Kiilerich
url: refactor _gen_sendfile
r13420 def _gen_sendfile(orgsend):
Benoit Boissinot
factor out the url handling from httprepo...
r7270 def _sendfile(self, data):
# send a file
if isinstance(data, httpsendfile):
# if auth required, some data sent twice, so rewind here
data.seek(0)
for chunk in util.filechunkiter(data):
Mads Kiilerich
url: refactor _gen_sendfile
r13420 orgsend(self, chunk)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 else:
Mads Kiilerich
url: refactor _gen_sendfile
r13420 orgsend(self, data)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 return _sendfile
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 has_https = hasattr(urllib2, 'HTTPSHandler')
if has_https:
try:
# avoid using deprecated/broken FakeSocket in python 2.6
import ssl
_ssl_wrap_socket = ssl.wrap_socket
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 CERT_REQUIRED = ssl.CERT_REQUIRED
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 except ImportError:
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 CERT_REQUIRED = 2
def _ssl_wrap_socket(sock, key_file, cert_file,
cert_reqs=CERT_REQUIRED, ca_certs=None):
if ca_certs:
raise util.Abort(_(
'certificate checking requires Python 2.6'))
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 ssl = socket.ssl(sock, key_file, cert_file)
return httplib.FakeSocket(sock, ssl)
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 try:
_create_connection = socket.create_connection
Matt Mackall
ssl: fix compatibility with pre-2.6 Python
r10411 except AttributeError:
Benoit Boissinot
url: fix python < 2.6 with ssl installed...
r10482 _GLOBAL_DEFAULT_TIMEOUT = object()
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
# lifted from Python 2.6
msg = "getaddrinfo returns an empty list"
host, port = address
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except socket.error, msg:
if sock is not None:
sock.close()
raise socket.error, msg
Benoit Boissinot
factor out the url handling from httprepo...
r7270 class httpconnection(keepalive.HTTPConnection):
# must be able to send big bundle as stream.
Mads Kiilerich
url: refactor _gen_sendfile
r13420 send = _gen_sendfile(keepalive.HTTPConnection.send)
Benoit Boissinot
factor out the url handling from httprepo...
r7270
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 def connect(self):
Benoit Boissinot
url: proxy handling, simplify and correctly deal with IPv6...
r10415 if has_https and self.realhostport: # use CONNECT proxy
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((self.host, self.port))
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 if _generic_proxytunnel(self):
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 # we do not support client x509 certificates
self.sock = _ssl_wrap_socket(self.sock, None, None)
else:
keepalive.HTTPConnection.connect(self)
def getresponse(self):
proxyres = getattr(self, 'proxyres', None)
if proxyres:
if proxyres.will_close:
self.close()
self.proxyres = None
return proxyres
return keepalive.HTTPConnection.getresponse(self)
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 # general transaction handler to support different ways to handle
# HTTPS proxying before and after Python 2.6.3.
def _generic_start_transaction(handler, h, req):
if hasattr(req, '_tunnel_host') and req._tunnel_host:
tunnel_host = req._tunnel_host
if tunnel_host[:7] not in ['http://', 'https:/']:
tunnel_host = 'https://' + tunnel_host
new_tunnel = True
else:
tunnel_host = req.get_selector()
new_tunnel = False
if new_tunnel or tunnel_host == req.get_full_url(): # has proxy
Brodie Rao
url: use url.url in proxyhandler
r13820 u = url(tunnel_host)
if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS
h.realhostport = ':'.join([u.host, (u.port or '443')])
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 h.headers = req.headers.copy()
h.headers.update(handler.parent.addheaders)
return
Benoit Boissinot
url: proxy handling, simplify and correctly deal with IPv6...
r10415 h.realhostport = None
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 h.headers = None
def _generic_proxytunnel(self):
proxyheaders = dict(
[(x, self.headers[x]) for x in self.headers
if x.lower().startswith('proxy-')])
self._set_hostport(self.host, self.port)
Benoit Boissinot
url: proxy handling, simplify and correctly deal with IPv6...
r10415 self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport)
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 for header in proxyheaders.iteritems():
self.send('%s: %s\r\n' % header)
self.send('\r\n')
# majority of the following code is duplicated from
# httplib.HTTPConnection as there are no adequate places to
# override functions to provide the needed functionality
res = self.response_class(self.sock,
strict=self.strict,
method=self._method)
while True:
version, status, reason = res._read_status()
if status != httplib.CONTINUE:
break
while True:
skip = res.fp.readline().strip()
if not skip:
break
res.status = status
res.reason = reason.strip()
if res.status == 200:
while True:
line = res.fp.readline()
if line == '\r\n':
break
return True
if version == 'HTTP/1.0':
res.version = 10
elif version.startswith('HTTP/1.'):
res.version = 11
elif version == 'HTTP/0.9':
res.version = 9
else:
raise httplib.UnknownProtocol(version)
if res.version == 9:
res.length = None
res.chunked = 0
res.will_close = 1
res.msg = httplib.HTTPMessage(cStringIO.StringIO())
return False
res.msg = httplib.HTTPMessage(res.fp)
res.msg.fp = None
# are we using the chunked-style of transfer encoding?
trenc = res.msg.getheader('transfer-encoding')
if trenc and trenc.lower() == "chunked":
res.chunked = 1
res.chunk_left = None
else:
res.chunked = 0
# will the connection close at the end of the response?
res.will_close = res._check_close()
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
length = res.msg.getheader('content-length')
if length and not res.chunked:
try:
res.length = int(length)
except ValueError:
res.length = None
else:
if res.length < 0: # ignore nonsensical negative lengths
res.length = None
else:
res.length = None
# does the body have a fixed length? (of zero)
if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
100 <= status < 200 or # 1xx codes
res._method == 'HEAD'):
res.length = 0
# if the connection remains open, and we aren't using chunked, and
# a content-length was not provided, then assume that the connection
# WILL close.
if (not res.will_close and
not res.chunked and
res.length is None):
res.will_close = 1
self.proxyres = res
return False
Benoit Boissinot
factor out the url handling from httprepo...
r7270 class httphandler(keepalive.HTTPHandler):
def http_open(self, req):
return self.do_open(httpconnection, req)
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 def _start_transaction(self, h, req):
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 _generic_start_transaction(self, h, req)
Henrik Stuart
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
r8590 return keepalive.HTTPHandler._start_transaction(self, h, req)
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592 def _verifycert(cert, hostname):
Mads Kiilerich
url: validity (notBefore/notAfter) is checked by OpenSSL (issue2407)...
r12742 '''Verify that cert (in socket.getpeercert() format) matches hostname.
Yuya Nishihara
url: check subjectAltName when verifying ssl certificate...
r13249 CRLs is not handled.
Martin Geisler
check-code: find trailing whitespace
r12770
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592 Returns error message if any problems are found and None on success.
'''
if not cert:
return _('no certificate received')
dnsname = hostname.lower()
Yuya Nishihara
url: check subjectAltName when verifying ssl certificate...
r13249 def matchdnsname(certname):
return (certname == dnsname or
'.' in dnsname and certname == '*.' + dnsname.split('.', 1)[1])
san = cert.get('subjectAltName', [])
if san:
certnames = [value.lower() for key, value in san if key == 'DNS']
for name in certnames:
if matchdnsname(name):
return None
return _('certificate is for %s') % ', '.join(certnames)
# subject is only checked when subjectAltName is empty
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592 for s in cert.get('subject', []):
key, value = s[0]
if key == 'commonName':
Yuya Nishihara
url: fix UnicodeDecodeError on certificate verification error...
r13248 try:
# 'subject' entries are unicode
certname = value.lower().encode('ascii')
except UnicodeEncodeError:
return _('IDN in certificate not supported')
Yuya Nishihara
url: check subjectAltName when verifying ssl certificate...
r13249 if matchdnsname(certname):
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592 return None
return _('certificate is for %s') % certname
Yuya Nishihara
url: check subjectAltName when verifying ssl certificate...
r13249 return _('no commonName or subjectAltName found in certificate')
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592
Benoit Boissinot
factor out the url handling from httprepo...
r7270 if has_https:
Mads Kiilerich
url: merge BetterHTTPS with httpsconnection to get some proxy https validation
r13424 class httpsconnection(httplib.HTTPSConnection):
response_class = keepalive.HTTPResponse
# must be able to send big bundle as stream.
send = _gen_sendfile(keepalive.safesend)
getresponse = keepalive.wrapgetresponse(httplib.HTTPSConnection)
Augie Fackler
keepalive: handle broken pipes gracefully during large POSTs
r9726
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 def connect(self):
Mads Kiilerich
url: always create BetterHTTPS connections the same way
r13422 self.sock = _create_connection((self.host, self.port))
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 host = self.host
Mads Kiilerich
url: merge BetterHTTPS with httpsconnection to get some proxy https validation
r13424 if self.realhostport: # use CONNECT proxy
something = _generic_proxytunnel(self)
host = self.realhostport.rsplit(':', 1)[0]
Mads Kiilerich
url: remove test for self.ui in BetterHTTPS...
r13419 cacerts = self.ui.config('web', 'cacerts')
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 hostfingerprint = self.ui.config('hostfingerprints', host)
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 if cacerts and not hostfingerprint:
timeless
cacert: improve error report when web.cacert file does not exist
r13544 cacerts = util.expandpath(cacerts)
if not os.path.exists(cacerts):
raise util.Abort(_('could not find '
'web.cacerts: %s') % cacerts)
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
self.cert_file, cert_reqs=CERT_REQUIRED,
timeless
cacert: improve error report when web.cacert file does not exist
r13544 ca_certs=cacerts)
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 msg = _verifycert(self.sock.getpeercert(), host)
Mads Kiilerich
url: verify correctness of https server certificates (issue2407)...
r12592 if msg:
Yuya Nishihara
url: add --insecure option to bypass verification of ssl certificates...
r13328 raise util.Abort(_('%s certificate error: %s '
'(use --insecure to connect '
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 'insecurely)') % (host, msg))
self.ui.debug('%s certificate successfully verified\n' % host)
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 else:
Mads Kiilerich
url: always create BetterHTTPS connections the same way
r13422 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
self.cert_file)
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 if hasattr(self.sock, 'getpeercert'):
peercert = self.sock.getpeercert(True)
peerfingerprint = util.sha1(peercert).hexdigest()
nicefingerprint = ":".join([peerfingerprint[x:x + 2]
for x in xrange(0, len(peerfingerprint), 2)])
if hostfingerprint:
if peerfingerprint.lower() != \
hostfingerprint.replace(':', '').lower():
raise util.Abort(_('invalid certificate for %s '
'with fingerprint %s') %
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 (host, nicefingerprint))
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 self.ui.debug('%s certificate matched fingerprint %s\n' %
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 (host, nicefingerprint))
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 else:
self.ui.warn(_('warning: %s certificate '
'with fingerprint %s not verified '
'(check hostfingerprints or web.cacerts '
'config setting)\n') %
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 (host, nicefingerprint))
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 else: # python 2.5 ?
if hostfingerprint:
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 raise util.Abort(_('no certificate for %s with '
'configured hostfingerprint') % host)
Mads Kiilerich
url: 'ssh known host'-like checking of fingerprints of HTTPS certificates...
r13314 self.ui.warn(_('warning: %s certificate not verified '
'(check web.cacerts config setting)\n') %
Mads Kiilerich
url: refactor BetterHTTPS.connect
r13421 host)
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409
Benoit Boissinot
factor out the url handling from httprepo...
r7270 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847 def __init__(self, ui):
keepalive.KeepAliveHandler.__init__(self)
urllib2.HTTPSHandler.__init__(self)
self.ui = ui
self.pwmgr = passwordmgr(self.ui)
Henrik Stuart
url: generalise HTTPS proxy handling to accomodate Python changes...
r9852 def _start_transaction(self, h, req):
_generic_start_transaction(self, h, req)
return keepalive.KeepAliveHandler._start_transaction(self, h, req)
Benoit Boissinot
factor out the url handling from httprepo...
r7270 def https_open(self, req):
Steve Borho
url: return the matched authentication group name from readauthforuri()...
r13372 res = readauthforuri(self.ui, req.get_full_url())
if res:
group, auth = res
self.auth = auth
self.ui.debug("using auth.%s.* for authentication\n" % group)
else:
self.auth = None
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847 return self.do_open(self._makeconnection, req)
Benoit Boissinot
url: httplib.HTTPSConnection already handles IPv6 and port parsing fine
r10408 def _makeconnection(self, host, port=None, *args, **kwargs):
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847 keyfile = None
certfile = None
Benoit Boissinot
url: *args argument is a tuple, not a list (found by pylint)...
r10511 if len(args) >= 1: # key_file
keyfile = args[0]
if len(args) >= 2: # cert_file
certfile = args[1]
args = args[2:]
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847
# if the user has specified different key/cert files in
# hgrc, we prefer these
if self.auth and 'key' in self.auth and 'cert' in self.auth:
keyfile = self.auth['key']
certfile = self.auth['cert']
Henrik Stuart
url: SSL server certificate verification using web.cacerts file (issue1174)
r10409 conn = httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
conn.ui = self.ui
return conn
Benoit Boissinot
factor out the url handling from httprepo...
r7270
class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
Mads Kiilerich
http digest auth: reset redirect counter on new requests (issue2255)...
r11457 def __init__(self, *args, **kwargs):
urllib2.HTTPDigestAuthHandler.__init__(self, *args, **kwargs)
self.retried_req = None
def reset_retry_count(self):
# Python 2.6.5 will call this on 401 or 407 errors and thus loop
# forever. We disable reset_retry_count completely and reset in
# http_error_auth_reqed instead.
pass
Benoit Boissinot
factor out the url handling from httprepo...
r7270 def http_error_auth_reqed(self, auth_header, host, req, headers):
Mads Kiilerich
http digest auth: reset redirect counter on new requests (issue2255)...
r11457 # Reset the retry counter once for each request.
if req is not self.retried_req:
self.retried_req = req
self.retried = 0
# In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
# it doesn't know about the auth type requested. This can happen if
# somebody is using BasicAuth and types a bad password.
Benoit Boissinot
factor out the url handling from httprepo...
r7270 try:
return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
self, auth_header, host, req, headers)
except ValueError, inst:
arg = inst.args[0]
if arg.startswith("AbstractDigestAuthHandler doesn't know "):
return
raise
Wagner Bruna
http basic auth: reset redirect counter on new requests (issue2255)...
r11844 class httpbasicauthhandler(urllib2.HTTPBasicAuthHandler):
def __init__(self, *args, **kwargs):
urllib2.HTTPBasicAuthHandler.__init__(self, *args, **kwargs)
self.retried_req = None
def reset_retry_count(self):
# Python 2.6.5 will call this on 401 or 407 errors and thus loop
# forever. We disable reset_retry_count completely and reset in
# http_error_auth_reqed instead.
pass
def http_error_auth_reqed(self, auth_header, host, req, headers):
# Reset the retry counter once for each request.
if req is not self.retried_req:
self.retried_req = req
self.retried = 0
return urllib2.HTTPBasicAuthHandler.http_error_auth_reqed(
self, auth_header, host, req, headers)
Henrik Stuart
url: add support for custom handlers in extensions
r9347 handlerfuncs = []
Benoit Boissinot
factor out the url handling from httprepo...
r7270 def opener(ui, authinfo=None):
'''
construct an opener suitable for urllib2
authinfo will be added to the password manager
'''
handlers = [httphandler()]
if has_https:
Henrik Stuart
url: support client certificate files over HTTPS (issue643)...
r8847 handlers.append(httpshandler(ui))
Benoit Boissinot
factor out the url handling from httprepo...
r7270
handlers.append(proxyhandler(ui))
passmgr = passwordmgr(ui)
if authinfo is not None:
passmgr.add_password(*authinfo)
user, passwd = authinfo[2:4]
Martin Geisler
do not attempt to translate ui.debug output
r9467 ui.debug('http auth: user %s, password %s\n' %
Benoit Boissinot
factor out the url handling from httprepo...
r7270 (user, passwd and '*' * len(passwd) or 'not set'))
Wagner Bruna
http basic auth: reset redirect counter on new requests (issue2255)...
r11844 handlers.extend((httpbasicauthhandler(passmgr),
Benoit Boissinot
factor out the url handling from httprepo...
r7270 httpdigestauthhandler(passmgr)))
Henrik Stuart
url: add support for custom handlers in extensions
r9347 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
Benoit Boissinot
factor out the url handling from httprepo...
r7270 opener = urllib2.build_opener(*handlers)
# 1.0 here is the _protocol_ version
opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
return opener
Brodie Rao
url: use url.url in url.open()
r13818 def open(ui, url_, data=None):
u = url(url_)
if u.scheme:
u.scheme = u.scheme.lower()
url_, authinfo = u.authinfo()
else:
path = util.normpath(os.path.abspath(url_))
url_ = 'file://' + urllib.pathname2url(path)
Patrick Mezard
url: fix file:// URL handling
r7284 authinfo = None
Brodie Rao
url: use url.url in url.open()
r13818 return opener(ui, authinfo).open(url_, data)