url.py
961 lines
| 33.9 KiB
| text/x-python
|
PythonLexer
/ mercurial / url.py
Benoit Boissinot
|
r7270 | # url.py - HTTP handling for mercurial | ||
# | ||||
# Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> | ||||
# Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> | ||||
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Benoit Boissinot
|
r7270 | |||
Brodie Rao
|
r13848 | import urllib, urllib2, httplib, os, socket, cStringIO, re | ||
Renato Cunha
|
r11880 | import __builtin__ | ||
Benoit Boissinot
|
r7270 | from i18n import _ | ||
import keepalive, util | ||||
Brodie Rao
|
r13770 | class url(object): | ||
"""Reliable URL parser. | ||||
This parses URLs and provides attributes for the following | ||||
components: | ||||
<scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | ||||
Missing components are set to None. The only exception is | ||||
fragment, which is set to '' if present but empty. | ||||
Matt Mackall
|
r13827 | If parsefragment is False, fragment is included in query. If | ||
parsequery is False, query is included in path. If both are | ||||
Brodie Rao
|
r13770 | False, both fragment and query are included in path. | ||
See http://www.ietf.org/rfc/rfc2396.txt for more information. | ||||
Brodie Rao
|
r13816 | Note that for backward compatibility reasons, bundle URLs do not | ||
take host names. That means 'bundle://../' has a path of '../'. | ||||
Brodie Rao
|
r13770 | Examples: | ||
>>> url('http://www.ietf.org/rfc/rfc2396.txt') | ||||
<url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | ||||
>>> url('ssh://[::1]:2200//home/joe/repo') | ||||
<url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | ||||
>>> url('file:///home/joe/repo') | ||||
<url scheme: 'file', path: '/home/joe/repo'> | ||||
>>> url('bundle:foo') | ||||
<url scheme: 'bundle', path: 'foo'> | ||||
Brodie Rao
|
r13816 | >>> url('bundle://../foo') | ||
<url scheme: 'bundle', path: '../foo'> | ||||
Matt Mackall
|
r13807 | >>> url('c:\\\\foo\\\\bar') | ||
<url path: 'c:\\\\foo\\\\bar'> | ||||
Brodie Rao
|
r13770 | |||
Authentication credentials: | ||||
>>> url('ssh://joe:xyz@x/repo') | ||||
<url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | ||||
>>> url('ssh://joe@x/repo') | ||||
<url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | ||||
Query strings and fragments: | ||||
>>> url('http://host/a?b#c') | ||||
<url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | ||||
Matt Mackall
|
r13827 | >>> url('http://host/a?b#c', parsequery=False, parsefragment=False) | ||
Brodie Rao
|
r13770 | <url scheme: 'http', host: 'host', path: 'a?b#c'> | ||
""" | ||||
_safechars = "!~*'()+" | ||||
_safepchars = "/!~*'()+" | ||||
Brodie Rao
|
r13848 | _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match | ||
Brodie Rao
|
r13770 | |||
Matt Mackall
|
r13827 | def __init__(self, path, parsequery=True, parsefragment=True): | ||
Brodie Rao
|
r13770 | # We slowly chomp away at path until we have only the path left | ||
self.scheme = self.user = self.passwd = self.host = None | ||||
self.port = self.path = self.query = self.fragment = None | ||||
self._localpath = True | ||||
Brodie Rao
|
r13826 | self._hostport = '' | ||
self._origpath = path | ||||
Brodie Rao
|
r13770 | |||
Matt Mackall
|
r13807 | # special case for Windows drive letters | ||
Matt Mackall
|
r13827 | if hasdriveletter(path): | ||
Matt Mackall
|
r13807 | self.path = path | ||
return | ||||
Brodie Rao
|
r13816 | # For compatibility reasons, we can't handle bundle paths as | ||
# normal URLS | ||||
if path.startswith('bundle:'): | ||||
self.scheme = 'bundle' | ||||
path = path[7:] | ||||
if path.startswith('//'): | ||||
path = path[2:] | ||||
self.path = path | ||||
return | ||||
Brodie Rao
|
r13848 | if self._matchscheme(path): | ||
Brodie Rao
|
r13770 | parts = path.split(':', 1) | ||
if parts[0]: | ||||
self.scheme, path = parts | ||||
self._localpath = False | ||||
if not path: | ||||
path = None | ||||
if self._localpath: | ||||
self.path = '' | ||||
return | ||||
else: | ||||
Matt Mackall
|
r13827 | if parsefragment and '#' in path: | ||
Brodie Rao
|
r13770 | path, self.fragment = path.split('#', 1) | ||
if not path: | ||||
path = None | ||||
if self._localpath: | ||||
self.path = path | ||||
return | ||||
Matt Mackall
|
r13827 | if parsequery and '?' in path: | ||
Brodie Rao
|
r13770 | path, self.query = path.split('?', 1) | ||
if not path: | ||||
path = None | ||||
if not self.query: | ||||
self.query = None | ||||
# // is required to specify a host/authority | ||||
if path and path.startswith('//'): | ||||
parts = path[2:].split('/', 1) | ||||
if len(parts) > 1: | ||||
self.host, path = parts | ||||
path = path | ||||
else: | ||||
self.host = parts[0] | ||||
path = None | ||||
if not self.host: | ||||
self.host = None | ||||
if path: | ||||
path = '/' + path | ||||
if self.host and '@' in self.host: | ||||
self.user, self.host = self.host.rsplit('@', 1) | ||||
if ':' in self.user: | ||||
self.user, self.passwd = self.user.split(':', 1) | ||||
if not self.host: | ||||
self.host = None | ||||
Michael Glassford
|
r11035 | |||
Brodie Rao
|
r13770 | # Don't split on colons in IPv6 addresses without ports | ||
if (self.host and ':' in self.host and | ||||
not (self.host.startswith('[') and self.host.endswith(']'))): | ||||
Brodie Rao
|
r13826 | self._hostport = self.host | ||
Brodie Rao
|
r13770 | self.host, self.port = self.host.rsplit(':', 1) | ||
if not self.host: | ||||
self.host = None | ||||
Brodie Rao
|
r13817 | |||
if (self.host and self.scheme == 'file' and | ||||
self.host not in ('localhost', '127.0.0.1', '[::1]')): | ||||
raise util.Abort(_('file:// URLs can only refer to localhost')) | ||||
Brodie Rao
|
r13770 | self.path = path | ||
for a in ('user', 'passwd', 'host', 'port', | ||||
'path', 'query', 'fragment'): | ||||
v = getattr(self, a) | ||||
if v is not None: | ||||
setattr(self, a, urllib.unquote(v)) | ||||
def __repr__(self): | ||||
attrs = [] | ||||
for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | ||||
'query', 'fragment'): | ||||
v = getattr(self, a) | ||||
if v is not None: | ||||
attrs.append('%s: %r' % (a, v)) | ||||
return '<url %s>' % ', '.join(attrs) | ||||
def __str__(self): | ||||
"""Join the URL's components back into a URL string. | ||||
Examples: | ||||
>>> str(url('http://user:pw@host:80/?foo#bar')) | ||||
'http://user:pw@host:80/?foo#bar' | ||||
>>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | ||||
'ssh://user:pw@[::1]:2200//home/joe#' | ||||
>>> str(url('http://localhost:80//')) | ||||
'http://localhost:80//' | ||||
>>> str(url('http://localhost:80/')) | ||||
'http://localhost:80/' | ||||
>>> str(url('http://localhost:80')) | ||||
Brodie Rao
|
r13815 | 'http://localhost:80/' | ||
Brodie Rao
|
r13770 | >>> str(url('bundle:foo')) | ||
'bundle:foo' | ||||
Brodie Rao
|
r13816 | >>> str(url('bundle://../foo')) | ||
'bundle:../foo' | ||||
Brodie Rao
|
r13770 | >>> str(url('path')) | ||
'path' | ||||
""" | ||||
if self._localpath: | ||||
s = self.path | ||||
Brodie Rao
|
r13816 | if self.scheme == 'bundle': | ||
s = 'bundle:' + s | ||||
Brodie Rao
|
r13770 | if self.fragment: | ||
s += '#' + self.fragment | ||||
return s | ||||
s = self.scheme + ':' | ||||
if (self.user or self.passwd or self.host or | ||||
self.scheme and not self.path): | ||||
s += '//' | ||||
if self.user: | ||||
s += urllib.quote(self.user, safe=self._safechars) | ||||
if self.passwd: | ||||
s += ':' + urllib.quote(self.passwd, safe=self._safechars) | ||||
if self.user or self.passwd: | ||||
s += '@' | ||||
if self.host: | ||||
if not (self.host.startswith('[') and self.host.endswith(']')): | ||||
s += urllib.quote(self.host) | ||||
else: | ||||
s += self.host | ||||
if self.port: | ||||
s += ':' + urllib.quote(self.port) | ||||
Brodie Rao
|
r13815 | if self.host: | ||
Brodie Rao
|
r13770 | s += '/' | ||
if self.path: | ||||
s += urllib.quote(self.path, safe=self._safepchars) | ||||
if self.query: | ||||
s += '?' + urllib.quote(self.query, safe=self._safepchars) | ||||
if self.fragment is not None: | ||||
s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | ||||
return s | ||||
def authinfo(self): | ||||
user, passwd = self.user, self.passwd | ||||
try: | ||||
self.user, self.passwd = None, None | ||||
s = str(self) | ||||
finally: | ||||
self.user, self.passwd = user, passwd | ||||
if not self.user: | ||||
return (s, None) | ||||
return (s, (None, (str(self), self.host), | ||||
self.user, self.passwd or '')) | ||||
Brodie Rao
|
r13826 | def localpath(self): | ||
if self.scheme == 'file' or self.scheme == 'bundle': | ||||
path = self.path or '/' | ||||
# For Windows, we need to promote hosts containing drive | ||||
# letters to paths with drive letters. | ||||
Matt Mackall
|
r13827 | if hasdriveletter(self._hostport): | ||
Brodie Rao
|
r13826 | path = self._hostport + '/' + self.path | ||
elif self.host is not None and self.path: | ||||
path = '/' + path | ||||
# We also need to handle the case of file:///C:/, which | ||||
# should return C:/, not /C:/. | ||||
Matt Mackall
|
r13827 | elif hasdriveletter(path): | ||
Brodie Rao
|
r13826 | # Strip leading slash from paths with drive names | ||
return path[1:] | ||||
return path | ||||
return self._origpath | ||||
Matt Mackall
|
r13827 | def hasscheme(path): | ||
Brodie Rao
|
r13770 | return bool(url(path).scheme) | ||
Matt Mackall
|
r13827 | def hasdriveletter(path): | ||
Brodie Rao
|
r13814 | return path[1:2] == ':' and path[0:1].isalpha() | ||
Brodie Rao
|
r13826 | def localpath(path): | ||
Matt Mackall
|
r13827 | return url(path, parsequery=False, parsefragment=False).localpath() | ||
Brodie Rao
|
r13826 | |||
Brodie Rao
|
r13772 | def hidepassword(u): | ||
Benoit Boissinot
|
r7270 | '''hide user credential in a url string''' | ||
Brodie Rao
|
r13772 | u = url(u) | ||
if u.passwd: | ||||
u.passwd = '***' | ||||
return str(u) | ||||
Benoit Boissinot
|
r7270 | |||
Brodie Rao
|
r13772 | def removeauth(u): | ||
Benoit Boissinot
|
r7270 | '''remove all authentication information from a url string''' | ||
Brodie Rao
|
r13772 | u = url(u) | ||
u.user = u.passwd = None | ||||
return str(u) | ||||
Benoit Boissinot
|
r7270 | |||
def netlocsplit(netloc): | ||||
'''split [user[:passwd]@]host[:port] into 4-tuple.''' | ||||
a = netloc.find('@') | ||||
if a == -1: | ||||
user, passwd = None, None | ||||
else: | ||||
Matt Mackall
|
r10282 | userpass, netloc = netloc[:a], netloc[a + 1:] | ||
Benoit Boissinot
|
r7270 | c = userpass.find(':') | ||
if c == -1: | ||||
user, passwd = urllib.unquote(userpass), None | ||||
else: | ||||
user = urllib.unquote(userpass[:c]) | ||||
Matt Mackall
|
r10282 | passwd = urllib.unquote(userpass[c + 1:]) | ||
Benoit Boissinot
|
r7270 | c = netloc.find(':') | ||
if c == -1: | ||||
host, port = netloc, None | ||||
else: | ||||
Matt Mackall
|
r10282 | host, port = netloc[:c], netloc[c + 1:] | ||
Benoit Boissinot
|
r7270 | return host, port, user, passwd | ||
def netlocunsplit(host, port, user=None, passwd=None): | ||||
'''turn host, port, user, passwd into [user[:passwd]@]host[:port].''' | ||||
if port: | ||||
hostport = host + ':' + port | ||||
else: | ||||
hostport = host | ||||
if user: | ||||
Benoit Boissinot
|
r10484 | quote = lambda s: urllib.quote(s, safe='') | ||
Benoit Boissinot
|
r7270 | if passwd: | ||
Benoit Boissinot
|
r10484 | userpass = quote(user) + ':' + quote(passwd) | ||
Benoit Boissinot
|
r7270 | else: | ||
Benoit Boissinot
|
r10484 | userpass = quote(user) | ||
Benoit Boissinot
|
r7270 | return userpass + '@' + hostport | ||
return hostport | ||||
Steve Borho
|
r13371 | def readauthforuri(ui, uri): | ||
# Read configuration | ||||
config = dict() | ||||
for key, val in ui.configitems('auth'): | ||||
if '.' not in key: | ||||
ui.warn(_("ignoring invalid [auth] key '%s'\n") % key) | ||||
continue | ||||
group, setting = key.rsplit('.', 1) | ||||
gdict = config.setdefault(group, dict()) | ||||
if setting in ('username', 'cert', 'key'): | ||||
val = util.expandpath(val) | ||||
gdict[setting] = val | ||||
# Find the best match | ||||
scheme, hostpath = uri.split('://', 1) | ||||
bestlen = 0 | ||||
bestauth = None | ||||
Steve Borho
|
r13372 | for group, auth in config.iteritems(): | ||
Steve Borho
|
r13371 | prefix = auth.get('prefix') | ||
if not prefix: | ||||
continue | ||||
p = prefix.split('://', 1) | ||||
if len(p) > 1: | ||||
schemes, prefix = [p[0]], p[1] | ||||
else: | ||||
schemes = (auth.get('schemes') or 'https').split() | ||||
if (prefix == '*' or hostpath.startswith(prefix)) and \ | ||||
len(prefix) > bestlen and scheme in schemes: | ||||
bestlen = len(prefix) | ||||
Steve Borho
|
r13372 | bestauth = group, auth | ||
Steve Borho
|
r13371 | return bestauth | ||
Benoit Boissinot
|
r7270 | _safe = ('abcdefghijklmnopqrstuvwxyz' | ||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | ||||
'0123456789' '_.-/') | ||||
_safeset = None | ||||
_hex = None | ||||
def quotepath(path): | ||||
'''quote the path part of a URL | ||||
This is similar to urllib.quote, but it also tries to avoid | ||||
quoting things twice (inspired by wget): | ||||
>>> quotepath('abc def') | ||||
'abc%20def' | ||||
>>> quotepath('abc%20def') | ||||
'abc%20def' | ||||
>>> quotepath('abc%20 def') | ||||
'abc%20%20def' | ||||
>>> quotepath('abc def%20') | ||||
'abc%20def%20' | ||||
>>> quotepath('abc def%2') | ||||
'abc%20def%252' | ||||
>>> quotepath('abc def%') | ||||
'abc%20def%25' | ||||
''' | ||||
global _safeset, _hex | ||||
if _safeset is None: | ||||
Martin Geisler
|
r8150 | _safeset = set(_safe) | ||
_hex = set('abcdefABCDEF0123456789') | ||||
Benoit Boissinot
|
r7270 | l = list(path) | ||
for i in xrange(len(l)): | ||||
c = l[i] | ||||
Matt Mackall
|
r10282 | if (c == '%' and i + 2 < len(l) and | ||
l[i + 1] in _hex and l[i + 2] in _hex): | ||||
Benoit Boissinot
|
r7270 | pass | ||
elif c not in _safeset: | ||||
l[i] = '%%%02X' % ord(c) | ||||
return ''.join(l) | ||||
class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm): | ||||
def __init__(self, ui): | ||||
urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self) | ||||
self.ui = ui | ||||
def find_user_password(self, realm, authuri): | ||||
authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password( | ||||
self, realm, authuri) | ||||
user, passwd = authinfo | ||||
if user and passwd: | ||||
Sune Foldager
|
r8333 | self._writedebug(user, passwd) | ||
Benoit Boissinot
|
r7270 | return (user, passwd) | ||
Sune Foldager
|
r8344 | if not user: | ||
Steve Borho
|
r13372 | res = readauthforuri(self.ui, authuri) | ||
if res: | ||||
group, auth = res | ||||
Henrik Stuart
|
r8847 | user, passwd = auth.get('username'), auth.get('password') | ||
Steve Borho
|
r13372 | self.ui.debug("using auth.%s.* for authentication\n" % group) | ||
Sune Foldager
|
r8333 | if not user or not passwd: | ||
if not self.ui.interactive(): | ||||
raise util.Abort(_('http authorization required')) | ||||
Benoit Boissinot
|
r7270 | |||
Sune Foldager
|
r8333 | self.ui.write(_("http authorization required\n")) | ||
timeless
|
r12862 | self.ui.write(_("realm: %s\n") % realm) | ||
Sune Foldager
|
r8333 | if user: | ||
timeless
|
r12862 | self.ui.write(_("user: %s\n") % user) | ||
Sune Foldager
|
r8333 | else: | ||
user = self.ui.prompt(_("user:"), default=None) | ||||
Benoit Boissinot
|
r7270 | |||
Sune Foldager
|
r8333 | if not passwd: | ||
passwd = self.ui.getpass() | ||||
Benoit Boissinot
|
r7270 | |||
self.add_password(realm, authuri, user, passwd) | ||||
Sune Foldager
|
r8333 | self._writedebug(user, passwd) | ||
Benoit Boissinot
|
r7270 | return (user, passwd) | ||
Sune Foldager
|
r8333 | def _writedebug(self, user, passwd): | ||
msg = _('http auth: user %s, password %s\n') | ||||
self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set')) | ||||
Benoit Boissinot
|
r7270 | class proxyhandler(urllib2.ProxyHandler): | ||
def __init__(self, ui): | ||||
proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy') | ||||
# XXX proxyauthinfo = None | ||||
if proxyurl: | ||||
# proxy can be proper url or host[:port] | ||||
if not (proxyurl.startswith('http:') or | ||||
proxyurl.startswith('https:')): | ||||
proxyurl = 'http://' + proxyurl + '/' | ||||
Brodie Rao
|
r13820 | proxy = url(proxyurl) | ||
if not proxy.user: | ||||
proxy.user = ui.config("http_proxy", "user") | ||||
proxy.passwd = ui.config("http_proxy", "passwd") | ||||
Benoit Boissinot
|
r7270 | |||
# see if we should use a proxy for this url | ||||
Matt Mackall
|
r10282 | no_list = ["localhost", "127.0.0.1"] | ||
Benoit Boissinot
|
r7270 | no_list.extend([p.lower() for | ||
p in ui.configlist("http_proxy", "no")]) | ||||
no_list.extend([p.strip().lower() for | ||||
p in os.getenv("no_proxy", '').split(',') | ||||
if p.strip()]) | ||||
# "http_proxy.always" config is for running tests on localhost | ||||
if ui.configbool("http_proxy", "always"): | ||||
self.no_list = [] | ||||
else: | ||||
self.no_list = no_list | ||||
Brodie Rao
|
r13820 | proxyurl = str(proxy) | ||
Benoit Boissinot
|
r7270 | proxies = {'http': proxyurl, 'https': proxyurl} | ||
Martin Geisler
|
r9467 | ui.debug('proxying through http://%s:%s\n' % | ||
Brodie Rao
|
r13820 | (proxy.host, proxy.port)) | ||
Benoit Boissinot
|
r7270 | else: | ||
proxies = {} | ||||
# urllib2 takes proxy values from the environment and those | ||||
# will take precedence if found, so drop them | ||||
for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]: | ||||
try: | ||||
if env in os.environ: | ||||
del os.environ[env] | ||||
except OSError: | ||||
pass | ||||
urllib2.ProxyHandler.__init__(self, proxies) | ||||
self.ui = ui | ||||
def proxy_open(self, req, proxy, type_): | ||||
host = req.get_host().split(':')[0] | ||||
if host in self.no_list: | ||||
return None | ||||
# work around a bug in Python < 2.4.2 | ||||
# (it leaves a "\n" at the end of Proxy-authorization headers) | ||||
baseclass = req.__class__ | ||||
class _request(baseclass): | ||||
def add_header(self, key, val): | ||||
if key.lower() == 'proxy-authorization': | ||||
val = val.strip() | ||||
return baseclass.add_header(self, key, val) | ||||
req.__class__ = _request | ||||
return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_) | ||||
Renato Cunha
|
r11880 | class httpsendfile(object): | ||
"""This is a wrapper around the objects returned by python's "open". | ||||
Its purpose is to send file-like objects via HTTP and, to do so, it | ||||
defines a __len__ attribute to feed the Content-Length header. | ||||
""" | ||||
Augie Fackler
|
r13115 | def __init__(self, ui, *args, **kwargs): | ||
Renato Cunha
|
r11880 | # We can't just "self._data = open(*args, **kwargs)" here because there | ||
# is an "open" function defined in this module that shadows the global | ||||
# one | ||||
Augie Fackler
|
r13115 | self.ui = ui | ||
Renato Cunha
|
r11880 | self._data = __builtin__.open(*args, **kwargs) | ||
self.seek = self._data.seek | ||||
self.close = self._data.close | ||||
self.write = self._data.write | ||||
Augie Fackler
|
r13115 | self._len = os.fstat(self._data.fileno()).st_size | ||
self._pos = 0 | ||||
self._total = len(self) / 1024 * 2 | ||||
def read(self, *args, **kwargs): | ||||
try: | ||||
ret = self._data.read(*args, **kwargs) | ||||
except EOFError: | ||||
self.ui.progress(_('sending'), None) | ||||
self._pos += len(ret) | ||||
# We pass double the max for total because we currently have | ||||
# to send the bundle twice in the case of a server that | ||||
# requires authentication. Since we can't know until we try | ||||
# once whether authentication will be required, just lie to | ||||
# the user and maybe the push succeeds suddenly at 50%. | ||||
self.ui.progress(_('sending'), self._pos / 1024, | ||||
unit=_('kb'), total=self._total) | ||||
return ret | ||||
Renato Cunha
|
r11880 | |||
Benoit Boissinot
|
r7270 | def __len__(self): | ||
Augie Fackler
|
r13115 | return self._len | ||
Benoit Boissinot
|
r7270 | |||
Mads Kiilerich
|
r13420 | def _gen_sendfile(orgsend): | ||
Benoit Boissinot
|
r7270 | def _sendfile(self, data): | ||
# send a file | ||||
if isinstance(data, httpsendfile): | ||||
# if auth required, some data sent twice, so rewind here | ||||
data.seek(0) | ||||
for chunk in util.filechunkiter(data): | ||||
Mads Kiilerich
|
r13420 | orgsend(self, chunk) | ||
Benoit Boissinot
|
r7270 | else: | ||
Mads Kiilerich
|
r13420 | orgsend(self, data) | ||
Benoit Boissinot
|
r7270 | return _sendfile | ||
Henrik Stuart
|
r8590 | has_https = hasattr(urllib2, 'HTTPSHandler') | ||
if has_https: | ||||
try: | ||||
# avoid using deprecated/broken FakeSocket in python 2.6 | ||||
import ssl | ||||
_ssl_wrap_socket = ssl.wrap_socket | ||||
Henrik Stuart
|
r10409 | CERT_REQUIRED = ssl.CERT_REQUIRED | ||
Henrik Stuart
|
r8590 | except ImportError: | ||
Henrik Stuart
|
r10409 | CERT_REQUIRED = 2 | ||
def _ssl_wrap_socket(sock, key_file, cert_file, | ||||
cert_reqs=CERT_REQUIRED, ca_certs=None): | ||||
if ca_certs: | ||||
raise util.Abort(_( | ||||
'certificate checking requires Python 2.6')) | ||||
Henrik Stuart
|
r8590 | ssl = socket.ssl(sock, key_file, cert_file) | ||
return httplib.FakeSocket(sock, ssl) | ||||
Henrik Stuart
|
r10409 | try: | ||
_create_connection = socket.create_connection | ||||
Matt Mackall
|
r10411 | except AttributeError: | ||
Benoit Boissinot
|
r10482 | _GLOBAL_DEFAULT_TIMEOUT = object() | ||
Henrik Stuart
|
r10409 | def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, | ||
source_address=None): | ||||
# lifted from Python 2.6 | ||||
msg = "getaddrinfo returns an empty list" | ||||
host, port = address | ||||
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): | ||||
af, socktype, proto, canonname, sa = res | ||||
sock = None | ||||
try: | ||||
sock = socket.socket(af, socktype, proto) | ||||
if timeout is not _GLOBAL_DEFAULT_TIMEOUT: | ||||
sock.settimeout(timeout) | ||||
if source_address: | ||||
sock.bind(source_address) | ||||
sock.connect(sa) | ||||
return sock | ||||
except socket.error, msg: | ||||
if sock is not None: | ||||
sock.close() | ||||
raise socket.error, msg | ||||
Benoit Boissinot
|
r7270 | class httpconnection(keepalive.HTTPConnection): | ||
# must be able to send big bundle as stream. | ||||
Mads Kiilerich
|
r13420 | send = _gen_sendfile(keepalive.HTTPConnection.send) | ||
Benoit Boissinot
|
r7270 | |||
Henrik Stuart
|
r8590 | def connect(self): | ||
Benoit Boissinot
|
r10415 | if has_https and self.realhostport: # use CONNECT proxy | ||
Henrik Stuart
|
r8590 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
self.sock.connect((self.host, self.port)) | ||||
Henrik Stuart
|
r9852 | if _generic_proxytunnel(self): | ||
Henrik Stuart
|
r8590 | # we do not support client x509 certificates | ||
self.sock = _ssl_wrap_socket(self.sock, None, None) | ||||
else: | ||||
keepalive.HTTPConnection.connect(self) | ||||
def getresponse(self): | ||||
proxyres = getattr(self, 'proxyres', None) | ||||
if proxyres: | ||||
if proxyres.will_close: | ||||
self.close() | ||||
self.proxyres = None | ||||
return proxyres | ||||
return keepalive.HTTPConnection.getresponse(self) | ||||
Henrik Stuart
|
r9852 | # general transaction handler to support different ways to handle | ||
# HTTPS proxying before and after Python 2.6.3. | ||||
def _generic_start_transaction(handler, h, req): | ||||
if hasattr(req, '_tunnel_host') and req._tunnel_host: | ||||
tunnel_host = req._tunnel_host | ||||
if tunnel_host[:7] not in ['http://', 'https:/']: | ||||
tunnel_host = 'https://' + tunnel_host | ||||
new_tunnel = True | ||||
else: | ||||
tunnel_host = req.get_selector() | ||||
new_tunnel = False | ||||
if new_tunnel or tunnel_host == req.get_full_url(): # has proxy | ||||
Brodie Rao
|
r13820 | u = url(tunnel_host) | ||
if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS | ||||
h.realhostport = ':'.join([u.host, (u.port or '443')]) | ||||
Henrik Stuart
|
r9852 | h.headers = req.headers.copy() | ||
h.headers.update(handler.parent.addheaders) | ||||
return | ||||
Benoit Boissinot
|
r10415 | h.realhostport = None | ||
Henrik Stuart
|
r9852 | h.headers = None | ||
def _generic_proxytunnel(self): | ||||
proxyheaders = dict( | ||||
[(x, self.headers[x]) for x in self.headers | ||||
if x.lower().startswith('proxy-')]) | ||||
self._set_hostport(self.host, self.port) | ||||
Benoit Boissinot
|
r10415 | self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport) | ||
Henrik Stuart
|
r9852 | for header in proxyheaders.iteritems(): | ||
self.send('%s: %s\r\n' % header) | ||||
self.send('\r\n') | ||||
# majority of the following code is duplicated from | ||||
# httplib.HTTPConnection as there are no adequate places to | ||||
# override functions to provide the needed functionality | ||||
res = self.response_class(self.sock, | ||||
strict=self.strict, | ||||
method=self._method) | ||||
while True: | ||||
version, status, reason = res._read_status() | ||||
if status != httplib.CONTINUE: | ||||
break | ||||
while True: | ||||
skip = res.fp.readline().strip() | ||||
if not skip: | ||||
break | ||||
res.status = status | ||||
res.reason = reason.strip() | ||||
if res.status == 200: | ||||
while True: | ||||
line = res.fp.readline() | ||||
if line == '\r\n': | ||||
break | ||||
return True | ||||
if version == 'HTTP/1.0': | ||||
res.version = 10 | ||||
elif version.startswith('HTTP/1.'): | ||||
res.version = 11 | ||||
elif version == 'HTTP/0.9': | ||||
res.version = 9 | ||||
else: | ||||
raise httplib.UnknownProtocol(version) | ||||
if res.version == 9: | ||||
res.length = None | ||||
res.chunked = 0 | ||||
res.will_close = 1 | ||||
res.msg = httplib.HTTPMessage(cStringIO.StringIO()) | ||||
return False | ||||
res.msg = httplib.HTTPMessage(res.fp) | ||||
res.msg.fp = None | ||||
# are we using the chunked-style of transfer encoding? | ||||
trenc = res.msg.getheader('transfer-encoding') | ||||
if trenc and trenc.lower() == "chunked": | ||||
res.chunked = 1 | ||||
res.chunk_left = None | ||||
else: | ||||
res.chunked = 0 | ||||
# will the connection close at the end of the response? | ||||
res.will_close = res._check_close() | ||||
# do we have a Content-Length? | ||||
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" | ||||
length = res.msg.getheader('content-length') | ||||
if length and not res.chunked: | ||||
try: | ||||
res.length = int(length) | ||||
except ValueError: | ||||
res.length = None | ||||
else: | ||||
if res.length < 0: # ignore nonsensical negative lengths | ||||
res.length = None | ||||
else: | ||||
res.length = None | ||||
# does the body have a fixed length? (of zero) | ||||
if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or | ||||
100 <= status < 200 or # 1xx codes | ||||
res._method == 'HEAD'): | ||||
res.length = 0 | ||||
# if the connection remains open, and we aren't using chunked, and | ||||
# a content-length was not provided, then assume that the connection | ||||
# WILL close. | ||||
if (not res.will_close and | ||||
not res.chunked and | ||||
res.length is None): | ||||
res.will_close = 1 | ||||
self.proxyres = res | ||||
return False | ||||
Benoit Boissinot
|
r7270 | class httphandler(keepalive.HTTPHandler): | ||
def http_open(self, req): | ||||
return self.do_open(httpconnection, req) | ||||
Henrik Stuart
|
r8590 | def _start_transaction(self, h, req): | ||
Henrik Stuart
|
r9852 | _generic_start_transaction(self, h, req) | ||
Henrik Stuart
|
r8590 | return keepalive.HTTPHandler._start_transaction(self, h, req) | ||
Mads Kiilerich
|
r12592 | def _verifycert(cert, hostname): | ||
Mads Kiilerich
|
r12742 | '''Verify that cert (in socket.getpeercert() format) matches hostname. | ||
Yuya Nishihara
|
r13249 | CRLs is not handled. | ||
Martin Geisler
|
r12770 | |||
Mads Kiilerich
|
r12592 | Returns error message if any problems are found and None on success. | ||
''' | ||||
if not cert: | ||||
return _('no certificate received') | ||||
dnsname = hostname.lower() | ||||
Yuya Nishihara
|
r13249 | def matchdnsname(certname): | ||
return (certname == dnsname or | ||||
'.' in dnsname and certname == '*.' + dnsname.split('.', 1)[1]) | ||||
san = cert.get('subjectAltName', []) | ||||
if san: | ||||
certnames = [value.lower() for key, value in san if key == 'DNS'] | ||||
for name in certnames: | ||||
if matchdnsname(name): | ||||
return None | ||||
return _('certificate is for %s') % ', '.join(certnames) | ||||
# subject is only checked when subjectAltName is empty | ||||
Mads Kiilerich
|
r12592 | for s in cert.get('subject', []): | ||
key, value = s[0] | ||||
if key == 'commonName': | ||||
Yuya Nishihara
|
r13248 | try: | ||
# 'subject' entries are unicode | ||||
certname = value.lower().encode('ascii') | ||||
except UnicodeEncodeError: | ||||
return _('IDN in certificate not supported') | ||||
Yuya Nishihara
|
r13249 | if matchdnsname(certname): | ||
Mads Kiilerich
|
r12592 | return None | ||
return _('certificate is for %s') % certname | ||||
Yuya Nishihara
|
r13249 | return _('no commonName or subjectAltName found in certificate') | ||
Mads Kiilerich
|
r12592 | |||
Benoit Boissinot
|
r7270 | if has_https: | ||
Mads Kiilerich
|
r13424 | class httpsconnection(httplib.HTTPSConnection): | ||
response_class = keepalive.HTTPResponse | ||||
# must be able to send big bundle as stream. | ||||
send = _gen_sendfile(keepalive.safesend) | ||||
getresponse = keepalive.wrapgetresponse(httplib.HTTPSConnection) | ||||
Augie Fackler
|
r9726 | |||
Henrik Stuart
|
r10409 | def connect(self): | ||
Mads Kiilerich
|
r13422 | self.sock = _create_connection((self.host, self.port)) | ||
Mads Kiilerich
|
r13421 | host = self.host | ||
Mads Kiilerich
|
r13424 | if self.realhostport: # use CONNECT proxy | ||
something = _generic_proxytunnel(self) | ||||
host = self.realhostport.rsplit(':', 1)[0] | ||||
Mads Kiilerich
|
r13419 | cacerts = self.ui.config('web', 'cacerts') | ||
Mads Kiilerich
|
r13421 | hostfingerprint = self.ui.config('hostfingerprints', host) | ||
Henrik Stuart
|
r10409 | |||
Mads Kiilerich
|
r13314 | if cacerts and not hostfingerprint: | ||
timeless
|
r13544 | cacerts = util.expandpath(cacerts) | ||
if not os.path.exists(cacerts): | ||||
raise util.Abort(_('could not find ' | ||||
'web.cacerts: %s') % cacerts) | ||||
Mads Kiilerich
|
r13421 | self.sock = _ssl_wrap_socket(self.sock, self.key_file, | ||
self.cert_file, cert_reqs=CERT_REQUIRED, | ||||
timeless
|
r13544 | ca_certs=cacerts) | ||
Mads Kiilerich
|
r13421 | msg = _verifycert(self.sock.getpeercert(), host) | ||
Mads Kiilerich
|
r12592 | if msg: | ||
Yuya Nishihara
|
r13328 | raise util.Abort(_('%s certificate error: %s ' | ||
'(use --insecure to connect ' | ||||
Mads Kiilerich
|
r13421 | 'insecurely)') % (host, msg)) | ||
self.ui.debug('%s certificate successfully verified\n' % host) | ||||
Henrik Stuart
|
r10409 | else: | ||
Mads Kiilerich
|
r13422 | self.sock = _ssl_wrap_socket(self.sock, self.key_file, | ||
self.cert_file) | ||||
Mads Kiilerich
|
r13314 | if hasattr(self.sock, 'getpeercert'): | ||
peercert = self.sock.getpeercert(True) | ||||
peerfingerprint = util.sha1(peercert).hexdigest() | ||||
nicefingerprint = ":".join([peerfingerprint[x:x + 2] | ||||
for x in xrange(0, len(peerfingerprint), 2)]) | ||||
if hostfingerprint: | ||||
if peerfingerprint.lower() != \ | ||||
hostfingerprint.replace(':', '').lower(): | ||||
raise util.Abort(_('invalid certificate for %s ' | ||||
'with fingerprint %s') % | ||||
Mads Kiilerich
|
r13421 | (host, nicefingerprint)) | ||
Mads Kiilerich
|
r13314 | self.ui.debug('%s certificate matched fingerprint %s\n' % | ||
Mads Kiilerich
|
r13421 | (host, nicefingerprint)) | ||
Mads Kiilerich
|
r13314 | else: | ||
self.ui.warn(_('warning: %s certificate ' | ||||
'with fingerprint %s not verified ' | ||||
'(check hostfingerprints or web.cacerts ' | ||||
'config setting)\n') % | ||||
Mads Kiilerich
|
r13421 | (host, nicefingerprint)) | ||
Mads Kiilerich
|
r13314 | else: # python 2.5 ? | ||
if hostfingerprint: | ||||
Mads Kiilerich
|
r13421 | raise util.Abort(_('no certificate for %s with ' | ||
'configured hostfingerprint') % host) | ||||
Mads Kiilerich
|
r13314 | self.ui.warn(_('warning: %s certificate not verified ' | ||
'(check web.cacerts config setting)\n') % | ||||
Mads Kiilerich
|
r13421 | host) | ||
Henrik Stuart
|
r10409 | |||
Benoit Boissinot
|
r7270 | class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler): | ||
Henrik Stuart
|
r8847 | def __init__(self, ui): | ||
keepalive.KeepAliveHandler.__init__(self) | ||||
urllib2.HTTPSHandler.__init__(self) | ||||
self.ui = ui | ||||
self.pwmgr = passwordmgr(self.ui) | ||||
Henrik Stuart
|
r9852 | def _start_transaction(self, h, req): | ||
_generic_start_transaction(self, h, req) | ||||
return keepalive.KeepAliveHandler._start_transaction(self, h, req) | ||||
Benoit Boissinot
|
r7270 | def https_open(self, req): | ||
Steve Borho
|
r13372 | res = readauthforuri(self.ui, req.get_full_url()) | ||
if res: | ||||
group, auth = res | ||||
self.auth = auth | ||||
self.ui.debug("using auth.%s.* for authentication\n" % group) | ||||
else: | ||||
self.auth = None | ||||
Henrik Stuart
|
r8847 | return self.do_open(self._makeconnection, req) | ||
Benoit Boissinot
|
r10408 | def _makeconnection(self, host, port=None, *args, **kwargs): | ||
Henrik Stuart
|
r8847 | keyfile = None | ||
certfile = None | ||||
Benoit Boissinot
|
r10511 | if len(args) >= 1: # key_file | ||
keyfile = args[0] | ||||
if len(args) >= 2: # cert_file | ||||
certfile = args[1] | ||||
args = args[2:] | ||||
Henrik Stuart
|
r8847 | |||
# if the user has specified different key/cert files in | ||||
# hgrc, we prefer these | ||||
if self.auth and 'key' in self.auth and 'cert' in self.auth: | ||||
keyfile = self.auth['key'] | ||||
certfile = self.auth['cert'] | ||||
Henrik Stuart
|
r10409 | conn = httpsconnection(host, port, keyfile, certfile, *args, **kwargs) | ||
conn.ui = self.ui | ||||
return conn | ||||
Benoit Boissinot
|
r7270 | |||
class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler): | ||||
Mads Kiilerich
|
r11457 | def __init__(self, *args, **kwargs): | ||
urllib2.HTTPDigestAuthHandler.__init__(self, *args, **kwargs) | ||||
self.retried_req = None | ||||
def reset_retry_count(self): | ||||
# Python 2.6.5 will call this on 401 or 407 errors and thus loop | ||||
# forever. We disable reset_retry_count completely and reset in | ||||
# http_error_auth_reqed instead. | ||||
pass | ||||
Benoit Boissinot
|
r7270 | def http_error_auth_reqed(self, auth_header, host, req, headers): | ||
Mads Kiilerich
|
r11457 | # Reset the retry counter once for each request. | ||
if req is not self.retried_req: | ||||
self.retried_req = req | ||||
self.retried = 0 | ||||
# In python < 2.5 AbstractDigestAuthHandler raises a ValueError if | ||||
# it doesn't know about the auth type requested. This can happen if | ||||
# somebody is using BasicAuth and types a bad password. | ||||
Benoit Boissinot
|
r7270 | try: | ||
return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed( | ||||
self, auth_header, host, req, headers) | ||||
except ValueError, inst: | ||||
arg = inst.args[0] | ||||
if arg.startswith("AbstractDigestAuthHandler doesn't know "): | ||||
return | ||||
raise | ||||
Wagner Bruna
|
r11844 | class httpbasicauthhandler(urllib2.HTTPBasicAuthHandler): | ||
def __init__(self, *args, **kwargs): | ||||
urllib2.HTTPBasicAuthHandler.__init__(self, *args, **kwargs) | ||||
self.retried_req = None | ||||
def reset_retry_count(self): | ||||
# Python 2.6.5 will call this on 401 or 407 errors and thus loop | ||||
# forever. We disable reset_retry_count completely and reset in | ||||
# http_error_auth_reqed instead. | ||||
pass | ||||
def http_error_auth_reqed(self, auth_header, host, req, headers): | ||||
# Reset the retry counter once for each request. | ||||
if req is not self.retried_req: | ||||
self.retried_req = req | ||||
self.retried = 0 | ||||
return urllib2.HTTPBasicAuthHandler.http_error_auth_reqed( | ||||
self, auth_header, host, req, headers) | ||||
Henrik Stuart
|
r9347 | handlerfuncs = [] | ||
Benoit Boissinot
|
r7270 | def opener(ui, authinfo=None): | ||
''' | ||||
construct an opener suitable for urllib2 | ||||
authinfo will be added to the password manager | ||||
''' | ||||
handlers = [httphandler()] | ||||
if has_https: | ||||
Henrik Stuart
|
r8847 | handlers.append(httpshandler(ui)) | ||
Benoit Boissinot
|
r7270 | |||
handlers.append(proxyhandler(ui)) | ||||
passmgr = passwordmgr(ui) | ||||
if authinfo is not None: | ||||
passmgr.add_password(*authinfo) | ||||
user, passwd = authinfo[2:4] | ||||
Martin Geisler
|
r9467 | ui.debug('http auth: user %s, password %s\n' % | ||
Benoit Boissinot
|
r7270 | (user, passwd and '*' * len(passwd) or 'not set')) | ||
Wagner Bruna
|
r11844 | handlers.extend((httpbasicauthhandler(passmgr), | ||
Benoit Boissinot
|
r7270 | httpdigestauthhandler(passmgr))) | ||
Henrik Stuart
|
r9347 | handlers.extend([h(ui, passmgr) for h in handlerfuncs]) | ||
Benoit Boissinot
|
r7270 | opener = urllib2.build_opener(*handlers) | ||
# 1.0 here is the _protocol_ version | ||||
opener.addheaders = [('User-agent', 'mercurial/proto-1.0')] | ||||
opener.addheaders.append(('Accept', 'application/mercurial-0.1')) | ||||
return opener | ||||
Brodie Rao
|
r13818 | def open(ui, url_, data=None): | ||
u = url(url_) | ||||
if u.scheme: | ||||
u.scheme = u.scheme.lower() | ||||
url_, authinfo = u.authinfo() | ||||
else: | ||||
path = util.normpath(os.path.abspath(url_)) | ||||
url_ = 'file://' + urllib.pathname2url(path) | ||||
Patrick Mezard
|
r7284 | authinfo = None | ||
Brodie Rao
|
r13818 | return opener(ui, authinfo).open(url_, data) | ||