##// END OF EJS Templates
httpconnection: correctly handle redirects from http to https...
Augie Fackler -
r14346:bf85c263 default
parent child Browse files
Show More
@@ -1,264 +1,265 b''
1 # httpconnection.py - urllib2 handler for new http support
1 # httpconnection.py - urllib2 handler for new http support
2 #
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 # Copyright 2011 Google, Inc.
6 # Copyright 2011 Google, Inc.
7 #
7 #
8 # This software may be used and distributed according to the terms of the
8 # This software may be used and distributed according to the terms of the
9 # GNU General Public License version 2 or any later version.
9 # GNU General Public License version 2 or any later version.
10 import logging
10 import logging
11 import socket
11 import socket
12 import urllib
12 import urllib
13 import urllib2
13 import urllib2
14 import os
14 import os
15
15
16 from mercurial import httpclient
16 from mercurial import httpclient
17 from mercurial import sslutil
17 from mercurial import sslutil
18 from mercurial import util
18 from mercurial import util
19 from mercurial.i18n import _
19 from mercurial.i18n import _
20
20
21 # moved here from url.py to avoid a cycle
21 # moved here from url.py to avoid a cycle
22 class httpsendfile(object):
22 class httpsendfile(object):
23 """This is a wrapper around the objects returned by python's "open".
23 """This is a wrapper around the objects returned by python's "open".
24
24
25 Its purpose is to send file-like objects via HTTP and, to do so, it
25 Its purpose is to send file-like objects via HTTP and, to do so, it
26 defines a __len__ attribute to feed the Content-Length header.
26 defines a __len__ attribute to feed the Content-Length header.
27 """
27 """
28
28
29 def __init__(self, ui, *args, **kwargs):
29 def __init__(self, ui, *args, **kwargs):
30 # We can't just "self._data = open(*args, **kwargs)" here because there
30 # We can't just "self._data = open(*args, **kwargs)" here because there
31 # is an "open" function defined in this module that shadows the global
31 # is an "open" function defined in this module that shadows the global
32 # one
32 # one
33 self.ui = ui
33 self.ui = ui
34 self._data = open(*args, **kwargs)
34 self._data = open(*args, **kwargs)
35 self.seek = self._data.seek
35 self.seek = self._data.seek
36 self.close = self._data.close
36 self.close = self._data.close
37 self.write = self._data.write
37 self.write = self._data.write
38 self._len = os.fstat(self._data.fileno()).st_size
38 self._len = os.fstat(self._data.fileno()).st_size
39 self._pos = 0
39 self._pos = 0
40 self._total = len(self) / 1024 * 2
40 self._total = len(self) / 1024 * 2
41
41
42 def read(self, *args, **kwargs):
42 def read(self, *args, **kwargs):
43 try:
43 try:
44 ret = self._data.read(*args, **kwargs)
44 ret = self._data.read(*args, **kwargs)
45 except EOFError:
45 except EOFError:
46 self.ui.progress(_('sending'), None)
46 self.ui.progress(_('sending'), None)
47 self._pos += len(ret)
47 self._pos += len(ret)
48 # We pass double the max for total because we currently have
48 # We pass double the max for total because we currently have
49 # to send the bundle twice in the case of a server that
49 # to send the bundle twice in the case of a server that
50 # requires authentication. Since we can't know until we try
50 # requires authentication. Since we can't know until we try
51 # once whether authentication will be required, just lie to
51 # once whether authentication will be required, just lie to
52 # the user and maybe the push succeeds suddenly at 50%.
52 # the user and maybe the push succeeds suddenly at 50%.
53 self.ui.progress(_('sending'), self._pos / 1024,
53 self.ui.progress(_('sending'), self._pos / 1024,
54 unit=_('kb'), total=self._total)
54 unit=_('kb'), total=self._total)
55 return ret
55 return ret
56
56
57 def __len__(self):
57 def __len__(self):
58 return self._len
58 return self._len
59
59
60 # moved here from url.py to avoid a cycle
60 # moved here from url.py to avoid a cycle
61 def readauthforuri(ui, uri):
61 def readauthforuri(ui, uri):
62 # Read configuration
62 # Read configuration
63 config = dict()
63 config = dict()
64 for key, val in ui.configitems('auth'):
64 for key, val in ui.configitems('auth'):
65 if '.' not in key:
65 if '.' not in key:
66 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
66 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
67 continue
67 continue
68 group, setting = key.rsplit('.', 1)
68 group, setting = key.rsplit('.', 1)
69 gdict = config.setdefault(group, dict())
69 gdict = config.setdefault(group, dict())
70 if setting in ('username', 'cert', 'key'):
70 if setting in ('username', 'cert', 'key'):
71 val = util.expandpath(val)
71 val = util.expandpath(val)
72 gdict[setting] = val
72 gdict[setting] = val
73
73
74 # Find the best match
74 # Find the best match
75 scheme, hostpath = uri.split('://', 1)
75 scheme, hostpath = uri.split('://', 1)
76 bestlen = 0
76 bestlen = 0
77 bestauth = None
77 bestauth = None
78 for group, auth in config.iteritems():
78 for group, auth in config.iteritems():
79 prefix = auth.get('prefix')
79 prefix = auth.get('prefix')
80 if not prefix:
80 if not prefix:
81 continue
81 continue
82 p = prefix.split('://', 1)
82 p = prefix.split('://', 1)
83 if len(p) > 1:
83 if len(p) > 1:
84 schemes, prefix = [p[0]], p[1]
84 schemes, prefix = [p[0]], p[1]
85 else:
85 else:
86 schemes = (auth.get('schemes') or 'https').split()
86 schemes = (auth.get('schemes') or 'https').split()
87 if (prefix == '*' or hostpath.startswith(prefix)) and \
87 if (prefix == '*' or hostpath.startswith(prefix)) and \
88 len(prefix) > bestlen and scheme in schemes:
88 len(prefix) > bestlen and scheme in schemes:
89 bestlen = len(prefix)
89 bestlen = len(prefix)
90 bestauth = group, auth
90 bestauth = group, auth
91 return bestauth
91 return bestauth
92
92
93 # Mercurial (at least until we can remove the old codepath) requires
93 # Mercurial (at least until we can remove the old codepath) requires
94 # that the http response object be sufficiently file-like, so we
94 # that the http response object be sufficiently file-like, so we
95 # provide a close() method here.
95 # provide a close() method here.
96 class HTTPResponse(httpclient.HTTPResponse):
96 class HTTPResponse(httpclient.HTTPResponse):
97 def close(self):
97 def close(self):
98 pass
98 pass
99
99
100 class HTTPConnection(httpclient.HTTPConnection):
100 class HTTPConnection(httpclient.HTTPConnection):
101 response_class = HTTPResponse
101 response_class = HTTPResponse
102 def request(self, method, uri, body=None, headers={}):
102 def request(self, method, uri, body=None, headers={}):
103 if isinstance(body, httpsendfile):
103 if isinstance(body, httpsendfile):
104 body.seek(0)
104 body.seek(0)
105 httpclient.HTTPConnection.request(self, method, uri, body=body,
105 httpclient.HTTPConnection.request(self, method, uri, body=body,
106 headers=headers)
106 headers=headers)
107
107
108
108
109 _configuredlogging = False
109 _configuredlogging = False
110 # Subclass BOTH of these because otherwise urllib2 "helpfully"
110 # Subclass BOTH of these because otherwise urllib2 "helpfully"
111 # reinserts them since it notices we don't include any subclasses of
111 # reinserts them since it notices we don't include any subclasses of
112 # them.
112 # them.
113 class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
113 class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
114 def __init__(self, ui, pwmgr):
114 def __init__(self, ui, pwmgr):
115 global _configuredlogging
115 global _configuredlogging
116 urllib2.AbstractHTTPHandler.__init__(self)
116 urllib2.AbstractHTTPHandler.__init__(self)
117 self.ui = ui
117 self.ui = ui
118 self.pwmgr = pwmgr
118 self.pwmgr = pwmgr
119 self._connections = {}
119 self._connections = {}
120 loglevel = ui.config('ui', 'http2debuglevel', default=None)
120 loglevel = ui.config('ui', 'http2debuglevel', default=None)
121 if loglevel and not _configuredlogging:
121 if loglevel and not _configuredlogging:
122 _configuredlogging = True
122 _configuredlogging = True
123 logger = logging.getLogger('mercurial.httpclient')
123 logger = logging.getLogger('mercurial.httpclient')
124 logger.setLevel(getattr(logging, loglevel.upper()))
124 logger.setLevel(getattr(logging, loglevel.upper()))
125 logger.addHandler(logging.StreamHandler())
125 logger.addHandler(logging.StreamHandler())
126
126
127 def close_all(self):
127 def close_all(self):
128 """Close and remove all connection objects being kept for reuse."""
128 """Close and remove all connection objects being kept for reuse."""
129 for openconns in self._connections.values():
129 for openconns in self._connections.values():
130 for conn in openconns:
130 for conn in openconns:
131 conn.close()
131 conn.close()
132 self._connections = {}
132 self._connections = {}
133
133
134 # shamelessly borrowed from urllib2.AbstractHTTPHandler
134 # shamelessly borrowed from urllib2.AbstractHTTPHandler
135 def do_open(self, http_class, req):
135 def do_open(self, http_class, req, use_ssl):
136 """Return an addinfourl object for the request, using http_class.
136 """Return an addinfourl object for the request, using http_class.
137
137
138 http_class must implement the HTTPConnection API from httplib.
138 http_class must implement the HTTPConnection API from httplib.
139 The addinfourl return value is a file-like object. It also
139 The addinfourl return value is a file-like object. It also
140 has methods and attributes including:
140 has methods and attributes including:
141 - info(): return a mimetools.Message object for the headers
141 - info(): return a mimetools.Message object for the headers
142 - geturl(): return the original request URL
142 - geturl(): return the original request URL
143 - code: HTTP status code
143 - code: HTTP status code
144 """
144 """
145 # If using a proxy, the host returned by get_host() is
145 # If using a proxy, the host returned by get_host() is
146 # actually the proxy. On Python 2.6.1, the real destination
146 # actually the proxy. On Python 2.6.1, the real destination
147 # hostname is encoded in the URI in the urllib2 request
147 # hostname is encoded in the URI in the urllib2 request
148 # object. On Python 2.6.5, it's stored in the _tunnel_host
148 # object. On Python 2.6.5, it's stored in the _tunnel_host
149 # attribute which has no accessor.
149 # attribute which has no accessor.
150 tunhost = getattr(req, '_tunnel_host', None)
150 tunhost = getattr(req, '_tunnel_host', None)
151 host = req.get_host()
151 host = req.get_host()
152 if tunhost:
152 if tunhost:
153 proxyhost = host
153 proxyhost = host
154 host = tunhost
154 host = tunhost
155 elif req.has_proxy():
155 elif req.has_proxy():
156 proxyhost = req.get_host()
156 proxyhost = req.get_host()
157 host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
157 host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
158 else:
158 else:
159 proxyhost = None
159 proxyhost = None
160
160
161 if proxyhost:
161 if proxyhost:
162 if ':' in proxyhost:
162 if ':' in proxyhost:
163 # Note: this means we'll explode if we try and use an
163 # Note: this means we'll explode if we try and use an
164 # IPv6 http proxy. This isn't a regression, so we
164 # IPv6 http proxy. This isn't a regression, so we
165 # won't worry about it for now.
165 # won't worry about it for now.
166 proxyhost, proxyport = proxyhost.rsplit(':', 1)
166 proxyhost, proxyport = proxyhost.rsplit(':', 1)
167 else:
167 else:
168 proxyport = 3128 # squid default
168 proxyport = 3128 # squid default
169 proxy = (proxyhost, proxyport)
169 proxy = (proxyhost, proxyport)
170 else:
170 else:
171 proxy = None
171 proxy = None
172
172
173 if not host:
173 if not host:
174 raise urllib2.URLError('no host given')
174 raise urllib2.URLError('no host given')
175
175
176 allconns = self._connections.get((host, proxy), [])
176 connkey = use_ssl, host, proxy
177 allconns = self._connections.get(connkey, [])
177 conns = [c for c in allconns if not c.busy()]
178 conns = [c for c in allconns if not c.busy()]
178 if conns:
179 if conns:
179 h = conns[0]
180 h = conns[0]
180 else:
181 else:
181 if allconns:
182 if allconns:
182 self.ui.debug('all connections for %s busy, making a new '
183 self.ui.debug('all connections for %s busy, making a new '
183 'one\n' % host)
184 'one\n' % host)
184 timeout = None
185 timeout = None
185 if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
186 if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
186 timeout = req.timeout
187 timeout = req.timeout
187 h = http_class(host, timeout=timeout, proxy_hostport=proxy)
188 h = http_class(host, timeout=timeout, proxy_hostport=proxy)
188 self._connections.setdefault((host, proxy), []).append(h)
189 self._connections.setdefault(connkey, []).append(h)
189
190
190 headers = dict(req.headers)
191 headers = dict(req.headers)
191 headers.update(req.unredirected_hdrs)
192 headers.update(req.unredirected_hdrs)
192 headers = dict(
193 headers = dict(
193 (name.title(), val) for name, val in headers.items())
194 (name.title(), val) for name, val in headers.items())
194 try:
195 try:
195 path = req.get_selector()
196 path = req.get_selector()
196 if '://' in path:
197 if '://' in path:
197 path = path.split('://', 1)[1].split('/', 1)[1]
198 path = path.split('://', 1)[1].split('/', 1)[1]
198 if path[0] != '/':
199 if path[0] != '/':
199 path = '/' + path
200 path = '/' + path
200 h.request(req.get_method(), path, req.data, headers)
201 h.request(req.get_method(), path, req.data, headers)
201 r = h.getresponse()
202 r = h.getresponse()
202 except socket.error, err: # XXX what error?
203 except socket.error, err: # XXX what error?
203 raise urllib2.URLError(err)
204 raise urllib2.URLError(err)
204
205
205 # Pick apart the HTTPResponse object to get the addinfourl
206 # Pick apart the HTTPResponse object to get the addinfourl
206 # object initialized properly.
207 # object initialized properly.
207 r.recv = r.read
208 r.recv = r.read
208
209
209 resp = urllib.addinfourl(r, r.headers, req.get_full_url())
210 resp = urllib.addinfourl(r, r.headers, req.get_full_url())
210 resp.code = r.status
211 resp.code = r.status
211 resp.msg = r.reason
212 resp.msg = r.reason
212 return resp
213 return resp
213
214
214 # httplib always uses the given host/port as the socket connect
215 # httplib always uses the given host/port as the socket connect
215 # target, and then allows full URIs in the request path, which it
216 # target, and then allows full URIs in the request path, which it
216 # then observes and treats as a signal to do proxying instead.
217 # then observes and treats as a signal to do proxying instead.
217 def http_open(self, req):
218 def http_open(self, req):
218 if req.get_full_url().startswith('https'):
219 if req.get_full_url().startswith('https'):
219 return self.https_open(req)
220 return self.https_open(req)
220 return self.do_open(HTTPConnection, req)
221 return self.do_open(HTTPConnection, req, False)
221
222
222 def https_open(self, req):
223 def https_open(self, req):
223 res = readauthforuri(self.ui, req.get_full_url())
224 res = readauthforuri(self.ui, req.get_full_url())
224 if res:
225 if res:
225 group, auth = res
226 group, auth = res
226 self.auth = auth
227 self.auth = auth
227 self.ui.debug("using auth.%s.* for authentication\n" % group)
228 self.ui.debug("using auth.%s.* for authentication\n" % group)
228 else:
229 else:
229 self.auth = None
230 self.auth = None
230 return self.do_open(self._makesslconnection, req)
231 return self.do_open(self._makesslconnection, req, True)
231
232
232 def _makesslconnection(self, host, port=443, *args, **kwargs):
233 def _makesslconnection(self, host, port=443, *args, **kwargs):
233 keyfile = None
234 keyfile = None
234 certfile = None
235 certfile = None
235
236
236 if args: # key_file
237 if args: # key_file
237 keyfile = args.pop(0)
238 keyfile = args.pop(0)
238 if args: # cert_file
239 if args: # cert_file
239 certfile = args.pop(0)
240 certfile = args.pop(0)
240
241
241 # if the user has specified different key/cert files in
242 # if the user has specified different key/cert files in
242 # hgrc, we prefer these
243 # hgrc, we prefer these
243 if self.auth and 'key' in self.auth and 'cert' in self.auth:
244 if self.auth and 'key' in self.auth and 'cert' in self.auth:
244 keyfile = self.auth['key']
245 keyfile = self.auth['key']
245 certfile = self.auth['cert']
246 certfile = self.auth['cert']
246
247
247 # let host port take precedence
248 # let host port take precedence
248 if ':' in host and '[' not in host or ']:' in host:
249 if ':' in host and '[' not in host or ']:' in host:
249 host, port = host.rsplit(':', 1)
250 host, port = host.rsplit(':', 1)
250 port = int(port)
251 port = int(port)
251 if '[' in host:
252 if '[' in host:
252 host = host[1:-1]
253 host = host[1:-1]
253
254
254 if keyfile:
255 if keyfile:
255 kwargs['keyfile'] = keyfile
256 kwargs['keyfile'] = keyfile
256 if certfile:
257 if certfile:
257 kwargs['certfile'] = certfile
258 kwargs['certfile'] = certfile
258
259
259 kwargs.update(sslutil.sslkwargs(self.ui, host))
260 kwargs.update(sslutil.sslkwargs(self.ui, host))
260
261
261 con = HTTPConnection(host, port, use_ssl=True,
262 con = HTTPConnection(host, port, use_ssl=True,
262 ssl_validator=sslutil.validator(self.ui, host),
263 ssl_validator=sslutil.validator(self.ui, host),
263 **kwargs)
264 **kwargs)
264 return con
265 return con
General Comments 0
You need to be logged in to leave comments. Login now