##// END OF EJS Templates
url: use CONNECT for HTTPS connections through HTTP proxy (issue967)...
Henrik Stuart -
r8590:59acb9c7 default
parent child Browse files
Show More
@@ -1,349 +1,496
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2, incorporated herein by reference.
9 9
10 import urllib, urllib2, urlparse, httplib, os, re
10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
11 11 from i18n import _
12 12 import keepalive, util
13 13
14 14 def hidepassword(url):
15 15 '''hide user credential in a url string'''
16 16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
17 17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
18 18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
19 19
20 20 def removeauth(url):
21 21 '''remove all authentication information from a url string'''
22 22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
23 23 netloc = netloc[netloc.find('@')+1:]
24 24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
25 25
26 26 def netlocsplit(netloc):
27 27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
28 28
29 29 a = netloc.find('@')
30 30 if a == -1:
31 31 user, passwd = None, None
32 32 else:
33 33 userpass, netloc = netloc[:a], netloc[a+1:]
34 34 c = userpass.find(':')
35 35 if c == -1:
36 36 user, passwd = urllib.unquote(userpass), None
37 37 else:
38 38 user = urllib.unquote(userpass[:c])
39 39 passwd = urllib.unquote(userpass[c+1:])
40 40 c = netloc.find(':')
41 41 if c == -1:
42 42 host, port = netloc, None
43 43 else:
44 44 host, port = netloc[:c], netloc[c+1:]
45 45 return host, port, user, passwd
46 46
47 47 def netlocunsplit(host, port, user=None, passwd=None):
48 48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
49 49 if port:
50 50 hostport = host + ':' + port
51 51 else:
52 52 hostport = host
53 53 if user:
54 54 if passwd:
55 55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
56 56 else:
57 57 userpass = urllib.quote(user)
58 58 return userpass + '@' + hostport
59 59 return hostport
60 60
61 61 _safe = ('abcdefghijklmnopqrstuvwxyz'
62 62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
63 63 '0123456789' '_.-/')
64 64 _safeset = None
65 65 _hex = None
66 66 def quotepath(path):
67 67 '''quote the path part of a URL
68 68
69 69 This is similar to urllib.quote, but it also tries to avoid
70 70 quoting things twice (inspired by wget):
71 71
72 72 >>> quotepath('abc def')
73 73 'abc%20def'
74 74 >>> quotepath('abc%20def')
75 75 'abc%20def'
76 76 >>> quotepath('abc%20 def')
77 77 'abc%20%20def'
78 78 >>> quotepath('abc def%20')
79 79 'abc%20def%20'
80 80 >>> quotepath('abc def%2')
81 81 'abc%20def%252'
82 82 >>> quotepath('abc def%')
83 83 'abc%20def%25'
84 84 '''
85 85 global _safeset, _hex
86 86 if _safeset is None:
87 87 _safeset = set(_safe)
88 88 _hex = set('abcdefABCDEF0123456789')
89 89 l = list(path)
90 90 for i in xrange(len(l)):
91 91 c = l[i]
92 92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
93 93 pass
94 94 elif c not in _safeset:
95 95 l[i] = '%%%02X' % ord(c)
96 96 return ''.join(l)
97 97
98 98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
99 99 def __init__(self, ui):
100 100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
101 101 self.ui = ui
102 102
103 103 def find_user_password(self, realm, authuri):
104 104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
105 105 self, realm, authuri)
106 106 user, passwd = authinfo
107 107 if user and passwd:
108 108 self._writedebug(user, passwd)
109 109 return (user, passwd)
110 110
111 111 if not user:
112 112 user, passwd = self._readauthtoken(authuri)
113 113 if not user or not passwd:
114 114 if not self.ui.interactive():
115 115 raise util.Abort(_('http authorization required'))
116 116
117 117 self.ui.write(_("http authorization required\n"))
118 118 self.ui.status(_("realm: %s\n") % realm)
119 119 if user:
120 120 self.ui.status(_("user: %s\n") % user)
121 121 else:
122 122 user = self.ui.prompt(_("user:"), default=None)
123 123
124 124 if not passwd:
125 125 passwd = self.ui.getpass()
126 126
127 127 self.add_password(realm, authuri, user, passwd)
128 128 self._writedebug(user, passwd)
129 129 return (user, passwd)
130 130
131 131 def _writedebug(self, user, passwd):
132 132 msg = _('http auth: user %s, password %s\n')
133 133 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
134 134
135 135 def _readauthtoken(self, uri):
136 136 # Read configuration
137 137 config = dict()
138 138 for key, val in self.ui.configitems('auth'):
139 139 group, setting = key.split('.', 1)
140 140 gdict = config.setdefault(group, dict())
141 141 gdict[setting] = val
142 142
143 143 # Find the best match
144 144 scheme, hostpath = uri.split('://', 1)
145 145 bestlen = 0
146 146 bestauth = None, None
147 147 for auth in config.itervalues():
148 148 prefix = auth.get('prefix')
149 149 if not prefix: continue
150 150 p = prefix.split('://', 1)
151 151 if len(p) > 1:
152 152 schemes, prefix = [p[0]], p[1]
153 153 else:
154 154 schemes = (auth.get('schemes') or 'https').split()
155 155 if (prefix == '*' or hostpath.startswith(prefix)) and \
156 156 len(prefix) > bestlen and scheme in schemes:
157 157 bestlen = len(prefix)
158 158 bestauth = auth.get('username'), auth.get('password')
159 159 return bestauth
160 160
161 161 class proxyhandler(urllib2.ProxyHandler):
162 162 def __init__(self, ui):
163 163 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
164 164 # XXX proxyauthinfo = None
165 165
166 166 if proxyurl:
167 167 # proxy can be proper url or host[:port]
168 168 if not (proxyurl.startswith('http:') or
169 169 proxyurl.startswith('https:')):
170 170 proxyurl = 'http://' + proxyurl + '/'
171 171 snpqf = urlparse.urlsplit(proxyurl)
172 172 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
173 173 hpup = netlocsplit(proxynetloc)
174 174
175 175 proxyhost, proxyport, proxyuser, proxypasswd = hpup
176 176 if not proxyuser:
177 177 proxyuser = ui.config("http_proxy", "user")
178 178 proxypasswd = ui.config("http_proxy", "passwd")
179 179
180 180 # see if we should use a proxy for this url
181 181 no_list = [ "localhost", "127.0.0.1" ]
182 182 no_list.extend([p.lower() for
183 183 p in ui.configlist("http_proxy", "no")])
184 184 no_list.extend([p.strip().lower() for
185 185 p in os.getenv("no_proxy", '').split(',')
186 186 if p.strip()])
187 187 # "http_proxy.always" config is for running tests on localhost
188 188 if ui.configbool("http_proxy", "always"):
189 189 self.no_list = []
190 190 else:
191 191 self.no_list = no_list
192 192
193 193 proxyurl = urlparse.urlunsplit((
194 194 proxyscheme, netlocunsplit(proxyhost, proxyport,
195 195 proxyuser, proxypasswd or ''),
196 196 proxypath, proxyquery, proxyfrag))
197 197 proxies = {'http': proxyurl, 'https': proxyurl}
198 198 ui.debug(_('proxying through http://%s:%s\n') %
199 199 (proxyhost, proxyport))
200 200 else:
201 201 proxies = {}
202 202
203 203 # urllib2 takes proxy values from the environment and those
204 204 # will take precedence if found, so drop them
205 205 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
206 206 try:
207 207 if env in os.environ:
208 208 del os.environ[env]
209 209 except OSError:
210 210 pass
211 211
212 212 urllib2.ProxyHandler.__init__(self, proxies)
213 213 self.ui = ui
214 214
215 215 def proxy_open(self, req, proxy, type_):
216 216 host = req.get_host().split(':')[0]
217 217 if host in self.no_list:
218 218 return None
219 219
220 220 # work around a bug in Python < 2.4.2
221 221 # (it leaves a "\n" at the end of Proxy-authorization headers)
222 222 baseclass = req.__class__
223 223 class _request(baseclass):
224 224 def add_header(self, key, val):
225 225 if key.lower() == 'proxy-authorization':
226 226 val = val.strip()
227 227 return baseclass.add_header(self, key, val)
228 228 req.__class__ = _request
229 229
230 230 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
231 231
232 232 class httpsendfile(file):
233 233 def __len__(self):
234 234 return os.fstat(self.fileno()).st_size
235 235
236 236 def _gen_sendfile(connection):
237 237 def _sendfile(self, data):
238 238 # send a file
239 239 if isinstance(data, httpsendfile):
240 240 # if auth required, some data sent twice, so rewind here
241 241 data.seek(0)
242 242 for chunk in util.filechunkiter(data):
243 243 connection.send(self, chunk)
244 244 else:
245 245 connection.send(self, data)
246 246 return _sendfile
247 247
248 has_https = hasattr(urllib2, 'HTTPSHandler')
249 if has_https:
250 try:
251 # avoid using deprecated/broken FakeSocket in python 2.6
252 import ssl
253 _ssl_wrap_socket = ssl.wrap_socket
254 except ImportError:
255 def _ssl_wrap_socket(sock, key_file, cert_file):
256 ssl = socket.ssl(sock, key_file, cert_file)
257 return httplib.FakeSocket(sock, ssl)
258
248 259 class httpconnection(keepalive.HTTPConnection):
249 260 # must be able to send big bundle as stream.
250 261 send = _gen_sendfile(keepalive.HTTPConnection)
251 262
263 def _proxytunnel(self):
264 proxyheaders = dict(
265 [(x, self.headers[x]) for x in self.headers
266 if x.lower().startswith('proxy-')])
267 self._set_hostport(self.host, self.port)
268 self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
269 for header in proxyheaders.iteritems():
270 self.send('%s: %s\r\n' % header)
271 self.send('\r\n')
272
273 # majority of the following code is duplicated from
274 # httplib.HTTPConnection as there are no adequate places to
275 # override functions to provide the needed functionality
276 res = self.response_class(self.sock,
277 strict=self.strict,
278 method=self._method)
279
280 while True:
281 version, status, reason = res._read_status()
282 if status != httplib.CONTINUE:
283 break
284 while True:
285 skip = res.fp.readline().strip()
286 if not skip:
287 break
288 res.status = status
289 res.reason = reason.strip()
290
291 if res.status == 200:
292 while True:
293 line = res.fp.readline()
294 if line == '\r\n':
295 break
296 return True
297
298 if version == 'HTTP/1.0':
299 res.version = 10
300 elif version.startswith('HTTP/1.'):
301 res.version = 11
302 elif version == 'HTTP/0.9':
303 res.version = 9
304 else:
305 raise httplib.UnknownProtocol(version)
306
307 if res.version == 9:
308 res.length = None
309 res.chunked = 0
310 res.will_close = 1
311 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
312 return False
313
314 res.msg = httplib.HTTPMessage(res.fp)
315 res.msg.fp = None
316
317 # are we using the chunked-style of transfer encoding?
318 trenc = res.msg.getheader('transfer-encoding')
319 if trenc and trenc.lower() == "chunked":
320 res.chunked = 1
321 res.chunk_left = None
322 else:
323 res.chunked = 0
324
325 # will the connection close at the end of the response?
326 res.will_close = res._check_close()
327
328 # do we have a Content-Length?
329 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
330 length = res.msg.getheader('content-length')
331 if length and not res.chunked:
332 try:
333 res.length = int(length)
334 except ValueError:
335 res.length = None
336 else:
337 if res.length < 0: # ignore nonsensical negative lengths
338 res.length = None
339 else:
340 res.length = None
341
342 # does the body have a fixed length? (of zero)
343 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
344 100 <= status < 200 or # 1xx codes
345 res._method == 'HEAD'):
346 res.length = 0
347
348 # if the connection remains open, and we aren't using chunked, and
349 # a content-length was not provided, then assume that the connection
350 # WILL close.
351 if (not res.will_close and
352 not res.chunked and
353 res.length is None):
354 res.will_close = 1
355
356 self.proxyres = res
357
358 return False
359
360 def connect(self):
361 if has_https and self.realhost: # use CONNECT proxy
362 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
363 self.sock.connect((self.host, self.port))
364 if self._proxytunnel():
365 # we do not support client x509 certificates
366 self.sock = _ssl_wrap_socket(self.sock, None, None)
367 else:
368 keepalive.HTTPConnection.connect(self)
369
370 def getresponse(self):
371 proxyres = getattr(self, 'proxyres', None)
372 if proxyres:
373 if proxyres.will_close:
374 self.close()
375 self.proxyres = None
376 return proxyres
377 return keepalive.HTTPConnection.getresponse(self)
378
252 379 class httphandler(keepalive.HTTPHandler):
253 380 def http_open(self, req):
254 381 return self.do_open(httpconnection, req)
255 382
383 def _start_transaction(self, h, req):
384 if req.get_selector() == req.get_full_url(): # has proxy
385 urlparts = urlparse.urlparse(req.get_selector())
386 if urlparts[0] == 'https': # only use CONNECT for HTTPS
387 if ':' in urlparts[1]:
388 realhost, realport = urlparts[1].split(':')
389 else:
390 realhost = urlparts[1]
391 realport = 443
392
393 h.realhost = realhost
394 h.realport = realport
395 h.headers = req.headers.copy()
396 h.headers.update(self.parent.addheaders)
397 return keepalive.HTTPHandler._start_transaction(self, h, req)
398
399 h.realhost = None
400 h.realport = None
401 h.headers = None
402 return keepalive.HTTPHandler._start_transaction(self, h, req)
403
256 404 def __del__(self):
257 405 self.close_all()
258 406
259 has_https = hasattr(urllib2, 'HTTPSHandler')
260 407 if has_https:
261 408 class httpsconnection(httplib.HTTPSConnection):
262 409 response_class = keepalive.HTTPResponse
263 410 # must be able to send big bundle as stream.
264 411 send = _gen_sendfile(httplib.HTTPSConnection)
265 412
266 413 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
267 414 def https_open(self, req):
268 415 return self.do_open(httpsconnection, req)
269 416
270 417 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
271 418 # it doesn't know about the auth type requested. This can happen if
272 419 # somebody is using BasicAuth and types a bad password.
273 420 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
274 421 def http_error_auth_reqed(self, auth_header, host, req, headers):
275 422 try:
276 423 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
277 424 self, auth_header, host, req, headers)
278 425 except ValueError, inst:
279 426 arg = inst.args[0]
280 427 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
281 428 return
282 429 raise
283 430
284 431 def getauthinfo(path):
285 432 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
286 433 if not urlpath:
287 434 urlpath = '/'
288 435 if scheme != 'file':
289 436 # XXX: why are we quoting the path again with some smart
290 437 # heuristic here? Anyway, it cannot be done with file://
291 438 # urls since path encoding is os/fs dependent (see
292 439 # urllib.pathname2url() for details).
293 440 urlpath = quotepath(urlpath)
294 441 host, port, user, passwd = netlocsplit(netloc)
295 442
296 443 # urllib cannot handle URLs with embedded user or passwd
297 444 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
298 445 urlpath, query, frag))
299 446 if user:
300 447 netloc = host
301 448 if port:
302 449 netloc += ':' + port
303 450 # Python < 2.4.3 uses only the netloc to search for a password
304 451 authinfo = (None, (url, netloc), user, passwd or '')
305 452 else:
306 453 authinfo = None
307 454 return url, authinfo
308 455
309 456 def opener(ui, authinfo=None):
310 457 '''
311 458 construct an opener suitable for urllib2
312 459 authinfo will be added to the password manager
313 460 '''
314 461 handlers = [httphandler()]
315 462 if has_https:
316 463 handlers.append(httpshandler())
317 464
318 465 handlers.append(proxyhandler(ui))
319 466
320 467 passmgr = passwordmgr(ui)
321 468 if authinfo is not None:
322 469 passmgr.add_password(*authinfo)
323 470 user, passwd = authinfo[2:4]
324 471 ui.debug(_('http auth: user %s, password %s\n') %
325 472 (user, passwd and '*' * len(passwd) or 'not set'))
326 473
327 474 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
328 475 httpdigestauthhandler(passmgr)))
329 476 opener = urllib2.build_opener(*handlers)
330 477
331 478 # 1.0 here is the _protocol_ version
332 479 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
333 480 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
334 481 return opener
335 482
336 483 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
337 484
338 485 def open(ui, url, data=None):
339 486 scheme = None
340 487 m = scheme_re.search(url)
341 488 if m:
342 489 scheme = m.group(1).lower()
343 490 if not scheme:
344 491 path = util.normpath(os.path.abspath(url))
345 492 url = 'file://' + urllib.pathname2url(path)
346 493 authinfo = None
347 494 else:
348 495 url, authinfo = getauthinfo(url)
349 496 return opener(ui, authinfo).open(url, data)
General Comments 0
You need to be logged in to leave comments. Login now