##// END OF EJS Templates
url: provide url object...
Brodie Rao -
r13770:4e8f2310 default
parent child Browse files
Show More
@@ -1,762 +1,954 b''
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
11 11 import __builtin__
12 12 from i18n import _
13 13 import keepalive, util
14 14
15 15 def _urlunparse(scheme, netloc, path, params, query, fragment, url):
16 16 '''Handle cases where urlunparse(urlparse(x://)) doesn't preserve the "//"'''
17 17 result = urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
18 18 if (scheme and
19 19 result.startswith(scheme + ':') and
20 20 not result.startswith(scheme + '://') and
21 21 url.startswith(scheme + '://')
22 22 ):
23 23 result = scheme + '://' + result[len(scheme + ':'):]
24 24 return result
25 25
26 class url(object):
27 """Reliable URL parser.
28
29 This parses URLs and provides attributes for the following
30 components:
31
32 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
33
34 Missing components are set to None. The only exception is
35 fragment, which is set to '' if present but empty.
36
37 If parse_fragment is False, fragment is included in query. If
38 parse_query is False, query is included in path. If both are
39 False, both fragment and query are included in path.
40
41 See http://www.ietf.org/rfc/rfc2396.txt for more information.
42
43 Examples:
44
45 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
46 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
47 >>> url('ssh://[::1]:2200//home/joe/repo')
48 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
49 >>> url('file:///home/joe/repo')
50 <url scheme: 'file', path: '/home/joe/repo'>
51 >>> url('bundle:foo')
52 <url scheme: 'bundle', path: 'foo'>
53
54 Authentication credentials:
55
56 >>> url('ssh://joe:xyz@x/repo')
57 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
58 >>> url('ssh://joe@x/repo')
59 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
60
61 Query strings and fragments:
62
63 >>> url('http://host/a?b#c')
64 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
65 >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False)
66 <url scheme: 'http', host: 'host', path: 'a?b#c'>
67 """
68
69 _safechars = "!~*'()+"
70 _safepchars = "/!~*'()+"
71
72 def __init__(self, path, parse_query=True, parse_fragment=True):
73 # We slowly chomp away at path until we have only the path left
74 self.scheme = self.user = self.passwd = self.host = None
75 self.port = self.path = self.query = self.fragment = None
76 self._localpath = True
77
78 if not path.startswith('/') and ':' in path:
79 parts = path.split(':', 1)
80 if parts[0]:
81 self.scheme, path = parts
82 self._localpath = False
83
84 if not path:
85 path = None
86 if self._localpath:
87 self.path = ''
88 return
89 else:
90 if parse_fragment and '#' in path:
91 path, self.fragment = path.split('#', 1)
92 if not path:
93 path = None
94 if self._localpath:
95 self.path = path
96 return
97
98 if parse_query and '?' in path:
99 path, self.query = path.split('?', 1)
100 if not path:
101 path = None
102 if not self.query:
103 self.query = None
104
105 # // is required to specify a host/authority
106 if path and path.startswith('//'):
107 parts = path[2:].split('/', 1)
108 if len(parts) > 1:
109 self.host, path = parts
110 path = path
111 else:
112 self.host = parts[0]
113 path = None
114 if not self.host:
115 self.host = None
116 if path:
117 path = '/' + path
118
119 if self.host and '@' in self.host:
120 self.user, self.host = self.host.rsplit('@', 1)
121 if ':' in self.user:
122 self.user, self.passwd = self.user.split(':', 1)
123 if not self.host:
124 self.host = None
125
126 # Don't split on colons in IPv6 addresses without ports
127 if (self.host and ':' in self.host and
128 not (self.host.startswith('[') and self.host.endswith(']'))):
129 self.host, self.port = self.host.rsplit(':', 1)
130 if not self.host:
131 self.host = None
132 self.path = path
133
134 for a in ('user', 'passwd', 'host', 'port',
135 'path', 'query', 'fragment'):
136 v = getattr(self, a)
137 if v is not None:
138 setattr(self, a, urllib.unquote(v))
139
140 def __repr__(self):
141 attrs = []
142 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
143 'query', 'fragment'):
144 v = getattr(self, a)
145 if v is not None:
146 attrs.append('%s: %r' % (a, v))
147 return '<url %s>' % ', '.join(attrs)
148
149 def __str__(self):
150 """Join the URL's components back into a URL string.
151
152 Examples:
153
154 >>> str(url('http://user:pw@host:80/?foo#bar'))
155 'http://user:pw@host:80/?foo#bar'
156 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
157 'ssh://user:pw@[::1]:2200//home/joe#'
158 >>> str(url('http://localhost:80//'))
159 'http://localhost:80//'
160 >>> str(url('http://localhost:80/'))
161 'http://localhost:80/'
162 >>> str(url('http://localhost:80'))
163 'http://localhost:80'
164 >>> str(url('bundle:foo'))
165 'bundle:foo'
166 >>> str(url('path'))
167 'path'
168 """
169 if self._localpath:
170 s = self.path
171 if self.fragment:
172 s += '#' + self.fragment
173 return s
174
175 s = self.scheme + ':'
176 if (self.user or self.passwd or self.host or
177 self.scheme and not self.path):
178 s += '//'
179 if self.user:
180 s += urllib.quote(self.user, safe=self._safechars)
181 if self.passwd:
182 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
183 if self.user or self.passwd:
184 s += '@'
185 if self.host:
186 if not (self.host.startswith('[') and self.host.endswith(']')):
187 s += urllib.quote(self.host)
188 else:
189 s += self.host
190 if self.port:
191 s += ':' + urllib.quote(self.port)
192 if ((self.host and self.path is not None) or
193 (self.host and self.query or self.fragment)):
194 s += '/'
195 if self.path:
196 s += urllib.quote(self.path, safe=self._safepchars)
197 if self.query:
198 s += '?' + urllib.quote(self.query, safe=self._safepchars)
199 if self.fragment is not None:
200 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
201 return s
202
203 def authinfo(self):
204 user, passwd = self.user, self.passwd
205 try:
206 self.user, self.passwd = None, None
207 s = str(self)
208 finally:
209 self.user, self.passwd = user, passwd
210 if not self.user:
211 return (s, None)
212 return (s, (None, (str(self), self.host),
213 self.user, self.passwd or ''))
214
215 def has_scheme(path):
216 return bool(url(path).scheme)
217
26 218 def hidepassword(url):
27 219 '''hide user credential in a url string'''
28 220 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
29 221 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
30 222 return _urlunparse(scheme, netloc, path, params, query, fragment, url)
31 223
32 224 def removeauth(url):
33 225 '''remove all authentication information from a url string'''
34 226 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
35 227 netloc = netloc[netloc.find('@')+1:]
36 228 return _urlunparse(scheme, netloc, path, params, query, fragment, url)
37 229
38 230 def netlocsplit(netloc):
39 231 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
40 232
41 233 a = netloc.find('@')
42 234 if a == -1:
43 235 user, passwd = None, None
44 236 else:
45 237 userpass, netloc = netloc[:a], netloc[a + 1:]
46 238 c = userpass.find(':')
47 239 if c == -1:
48 240 user, passwd = urllib.unquote(userpass), None
49 241 else:
50 242 user = urllib.unquote(userpass[:c])
51 243 passwd = urllib.unquote(userpass[c + 1:])
52 244 c = netloc.find(':')
53 245 if c == -1:
54 246 host, port = netloc, None
55 247 else:
56 248 host, port = netloc[:c], netloc[c + 1:]
57 249 return host, port, user, passwd
58 250
59 251 def netlocunsplit(host, port, user=None, passwd=None):
60 252 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
61 253 if port:
62 254 hostport = host + ':' + port
63 255 else:
64 256 hostport = host
65 257 if user:
66 258 quote = lambda s: urllib.quote(s, safe='')
67 259 if passwd:
68 260 userpass = quote(user) + ':' + quote(passwd)
69 261 else:
70 262 userpass = quote(user)
71 263 return userpass + '@' + hostport
72 264 return hostport
73 265
74 266 def readauthforuri(ui, uri):
75 267 # Read configuration
76 268 config = dict()
77 269 for key, val in ui.configitems('auth'):
78 270 if '.' not in key:
79 271 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
80 272 continue
81 273 group, setting = key.rsplit('.', 1)
82 274 gdict = config.setdefault(group, dict())
83 275 if setting in ('username', 'cert', 'key'):
84 276 val = util.expandpath(val)
85 277 gdict[setting] = val
86 278
87 279 # Find the best match
88 280 scheme, hostpath = uri.split('://', 1)
89 281 bestlen = 0
90 282 bestauth = None
91 283 for group, auth in config.iteritems():
92 284 prefix = auth.get('prefix')
93 285 if not prefix:
94 286 continue
95 287 p = prefix.split('://', 1)
96 288 if len(p) > 1:
97 289 schemes, prefix = [p[0]], p[1]
98 290 else:
99 291 schemes = (auth.get('schemes') or 'https').split()
100 292 if (prefix == '*' or hostpath.startswith(prefix)) and \
101 293 len(prefix) > bestlen and scheme in schemes:
102 294 bestlen = len(prefix)
103 295 bestauth = group, auth
104 296 return bestauth
105 297
106 298 _safe = ('abcdefghijklmnopqrstuvwxyz'
107 299 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
108 300 '0123456789' '_.-/')
109 301 _safeset = None
110 302 _hex = None
111 303 def quotepath(path):
112 304 '''quote the path part of a URL
113 305
114 306 This is similar to urllib.quote, but it also tries to avoid
115 307 quoting things twice (inspired by wget):
116 308
117 309 >>> quotepath('abc def')
118 310 'abc%20def'
119 311 >>> quotepath('abc%20def')
120 312 'abc%20def'
121 313 >>> quotepath('abc%20 def')
122 314 'abc%20%20def'
123 315 >>> quotepath('abc def%20')
124 316 'abc%20def%20'
125 317 >>> quotepath('abc def%2')
126 318 'abc%20def%252'
127 319 >>> quotepath('abc def%')
128 320 'abc%20def%25'
129 321 '''
130 322 global _safeset, _hex
131 323 if _safeset is None:
132 324 _safeset = set(_safe)
133 325 _hex = set('abcdefABCDEF0123456789')
134 326 l = list(path)
135 327 for i in xrange(len(l)):
136 328 c = l[i]
137 329 if (c == '%' and i + 2 < len(l) and
138 330 l[i + 1] in _hex and l[i + 2] in _hex):
139 331 pass
140 332 elif c not in _safeset:
141 333 l[i] = '%%%02X' % ord(c)
142 334 return ''.join(l)
143 335
144 336 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
145 337 def __init__(self, ui):
146 338 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
147 339 self.ui = ui
148 340
149 341 def find_user_password(self, realm, authuri):
150 342 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
151 343 self, realm, authuri)
152 344 user, passwd = authinfo
153 345 if user and passwd:
154 346 self._writedebug(user, passwd)
155 347 return (user, passwd)
156 348
157 349 if not user:
158 350 res = readauthforuri(self.ui, authuri)
159 351 if res:
160 352 group, auth = res
161 353 user, passwd = auth.get('username'), auth.get('password')
162 354 self.ui.debug("using auth.%s.* for authentication\n" % group)
163 355 if not user or not passwd:
164 356 if not self.ui.interactive():
165 357 raise util.Abort(_('http authorization required'))
166 358
167 359 self.ui.write(_("http authorization required\n"))
168 360 self.ui.write(_("realm: %s\n") % realm)
169 361 if user:
170 362 self.ui.write(_("user: %s\n") % user)
171 363 else:
172 364 user = self.ui.prompt(_("user:"), default=None)
173 365
174 366 if not passwd:
175 367 passwd = self.ui.getpass()
176 368
177 369 self.add_password(realm, authuri, user, passwd)
178 370 self._writedebug(user, passwd)
179 371 return (user, passwd)
180 372
181 373 def _writedebug(self, user, passwd):
182 374 msg = _('http auth: user %s, password %s\n')
183 375 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
184 376
185 377 class proxyhandler(urllib2.ProxyHandler):
186 378 def __init__(self, ui):
187 379 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
188 380 # XXX proxyauthinfo = None
189 381
190 382 if proxyurl:
191 383 # proxy can be proper url or host[:port]
192 384 if not (proxyurl.startswith('http:') or
193 385 proxyurl.startswith('https:')):
194 386 proxyurl = 'http://' + proxyurl + '/'
195 387 snpqf = urlparse.urlsplit(proxyurl)
196 388 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
197 389 hpup = netlocsplit(proxynetloc)
198 390
199 391 proxyhost, proxyport, proxyuser, proxypasswd = hpup
200 392 if not proxyuser:
201 393 proxyuser = ui.config("http_proxy", "user")
202 394 proxypasswd = ui.config("http_proxy", "passwd")
203 395
204 396 # see if we should use a proxy for this url
205 397 no_list = ["localhost", "127.0.0.1"]
206 398 no_list.extend([p.lower() for
207 399 p in ui.configlist("http_proxy", "no")])
208 400 no_list.extend([p.strip().lower() for
209 401 p in os.getenv("no_proxy", '').split(',')
210 402 if p.strip()])
211 403 # "http_proxy.always" config is for running tests on localhost
212 404 if ui.configbool("http_proxy", "always"):
213 405 self.no_list = []
214 406 else:
215 407 self.no_list = no_list
216 408
217 409 proxyurl = urlparse.urlunsplit((
218 410 proxyscheme, netlocunsplit(proxyhost, proxyport,
219 411 proxyuser, proxypasswd or ''),
220 412 proxypath, proxyquery, proxyfrag))
221 413 proxies = {'http': proxyurl, 'https': proxyurl}
222 414 ui.debug('proxying through http://%s:%s\n' %
223 415 (proxyhost, proxyport))
224 416 else:
225 417 proxies = {}
226 418
227 419 # urllib2 takes proxy values from the environment and those
228 420 # will take precedence if found, so drop them
229 421 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
230 422 try:
231 423 if env in os.environ:
232 424 del os.environ[env]
233 425 except OSError:
234 426 pass
235 427
236 428 urllib2.ProxyHandler.__init__(self, proxies)
237 429 self.ui = ui
238 430
239 431 def proxy_open(self, req, proxy, type_):
240 432 host = req.get_host().split(':')[0]
241 433 if host in self.no_list:
242 434 return None
243 435
244 436 # work around a bug in Python < 2.4.2
245 437 # (it leaves a "\n" at the end of Proxy-authorization headers)
246 438 baseclass = req.__class__
247 439 class _request(baseclass):
248 440 def add_header(self, key, val):
249 441 if key.lower() == 'proxy-authorization':
250 442 val = val.strip()
251 443 return baseclass.add_header(self, key, val)
252 444 req.__class__ = _request
253 445
254 446 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
255 447
256 448 class httpsendfile(object):
257 449 """This is a wrapper around the objects returned by python's "open".
258 450
259 451 Its purpose is to send file-like objects via HTTP and, to do so, it
260 452 defines a __len__ attribute to feed the Content-Length header.
261 453 """
262 454
263 455 def __init__(self, ui, *args, **kwargs):
264 456 # We can't just "self._data = open(*args, **kwargs)" here because there
265 457 # is an "open" function defined in this module that shadows the global
266 458 # one
267 459 self.ui = ui
268 460 self._data = __builtin__.open(*args, **kwargs)
269 461 self.seek = self._data.seek
270 462 self.close = self._data.close
271 463 self.write = self._data.write
272 464 self._len = os.fstat(self._data.fileno()).st_size
273 465 self._pos = 0
274 466 self._total = len(self) / 1024 * 2
275 467
276 468 def read(self, *args, **kwargs):
277 469 try:
278 470 ret = self._data.read(*args, **kwargs)
279 471 except EOFError:
280 472 self.ui.progress(_('sending'), None)
281 473 self._pos += len(ret)
282 474 # We pass double the max for total because we currently have
283 475 # to send the bundle twice in the case of a server that
284 476 # requires authentication. Since we can't know until we try
285 477 # once whether authentication will be required, just lie to
286 478 # the user and maybe the push succeeds suddenly at 50%.
287 479 self.ui.progress(_('sending'), self._pos / 1024,
288 480 unit=_('kb'), total=self._total)
289 481 return ret
290 482
291 483 def __len__(self):
292 484 return self._len
293 485
294 486 def _gen_sendfile(orgsend):
295 487 def _sendfile(self, data):
296 488 # send a file
297 489 if isinstance(data, httpsendfile):
298 490 # if auth required, some data sent twice, so rewind here
299 491 data.seek(0)
300 492 for chunk in util.filechunkiter(data):
301 493 orgsend(self, chunk)
302 494 else:
303 495 orgsend(self, data)
304 496 return _sendfile
305 497
306 498 has_https = hasattr(urllib2, 'HTTPSHandler')
307 499 if has_https:
308 500 try:
309 501 # avoid using deprecated/broken FakeSocket in python 2.6
310 502 import ssl
311 503 _ssl_wrap_socket = ssl.wrap_socket
312 504 CERT_REQUIRED = ssl.CERT_REQUIRED
313 505 except ImportError:
314 506 CERT_REQUIRED = 2
315 507
316 508 def _ssl_wrap_socket(sock, key_file, cert_file,
317 509 cert_reqs=CERT_REQUIRED, ca_certs=None):
318 510 if ca_certs:
319 511 raise util.Abort(_(
320 512 'certificate checking requires Python 2.6'))
321 513
322 514 ssl = socket.ssl(sock, key_file, cert_file)
323 515 return httplib.FakeSocket(sock, ssl)
324 516
325 517 try:
326 518 _create_connection = socket.create_connection
327 519 except AttributeError:
328 520 _GLOBAL_DEFAULT_TIMEOUT = object()
329 521
330 522 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
331 523 source_address=None):
332 524 # lifted from Python 2.6
333 525
334 526 msg = "getaddrinfo returns an empty list"
335 527 host, port = address
336 528 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
337 529 af, socktype, proto, canonname, sa = res
338 530 sock = None
339 531 try:
340 532 sock = socket.socket(af, socktype, proto)
341 533 if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
342 534 sock.settimeout(timeout)
343 535 if source_address:
344 536 sock.bind(source_address)
345 537 sock.connect(sa)
346 538 return sock
347 539
348 540 except socket.error, msg:
349 541 if sock is not None:
350 542 sock.close()
351 543
352 544 raise socket.error, msg
353 545
354 546 class httpconnection(keepalive.HTTPConnection):
355 547 # must be able to send big bundle as stream.
356 548 send = _gen_sendfile(keepalive.HTTPConnection.send)
357 549
358 550 def connect(self):
359 551 if has_https and self.realhostport: # use CONNECT proxy
360 552 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
361 553 self.sock.connect((self.host, self.port))
362 554 if _generic_proxytunnel(self):
363 555 # we do not support client x509 certificates
364 556 self.sock = _ssl_wrap_socket(self.sock, None, None)
365 557 else:
366 558 keepalive.HTTPConnection.connect(self)
367 559
368 560 def getresponse(self):
369 561 proxyres = getattr(self, 'proxyres', None)
370 562 if proxyres:
371 563 if proxyres.will_close:
372 564 self.close()
373 565 self.proxyres = None
374 566 return proxyres
375 567 return keepalive.HTTPConnection.getresponse(self)
376 568
377 569 # general transaction handler to support different ways to handle
378 570 # HTTPS proxying before and after Python 2.6.3.
379 571 def _generic_start_transaction(handler, h, req):
380 572 if hasattr(req, '_tunnel_host') and req._tunnel_host:
381 573 tunnel_host = req._tunnel_host
382 574 if tunnel_host[:7] not in ['http://', 'https:/']:
383 575 tunnel_host = 'https://' + tunnel_host
384 576 new_tunnel = True
385 577 else:
386 578 tunnel_host = req.get_selector()
387 579 new_tunnel = False
388 580
389 581 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy
390 582 urlparts = urlparse.urlparse(tunnel_host)
391 583 if new_tunnel or urlparts[0] == 'https': # only use CONNECT for HTTPS
392 584 realhostport = urlparts[1]
393 585 if realhostport[-1] == ']' or ':' not in realhostport:
394 586 realhostport += ':443'
395 587
396 588 h.realhostport = realhostport
397 589 h.headers = req.headers.copy()
398 590 h.headers.update(handler.parent.addheaders)
399 591 return
400 592
401 593 h.realhostport = None
402 594 h.headers = None
403 595
404 596 def _generic_proxytunnel(self):
405 597 proxyheaders = dict(
406 598 [(x, self.headers[x]) for x in self.headers
407 599 if x.lower().startswith('proxy-')])
408 600 self._set_hostport(self.host, self.port)
409 601 self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport)
410 602 for header in proxyheaders.iteritems():
411 603 self.send('%s: %s\r\n' % header)
412 604 self.send('\r\n')
413 605
414 606 # majority of the following code is duplicated from
415 607 # httplib.HTTPConnection as there are no adequate places to
416 608 # override functions to provide the needed functionality
417 609 res = self.response_class(self.sock,
418 610 strict=self.strict,
419 611 method=self._method)
420 612
421 613 while True:
422 614 version, status, reason = res._read_status()
423 615 if status != httplib.CONTINUE:
424 616 break
425 617 while True:
426 618 skip = res.fp.readline().strip()
427 619 if not skip:
428 620 break
429 621 res.status = status
430 622 res.reason = reason.strip()
431 623
432 624 if res.status == 200:
433 625 while True:
434 626 line = res.fp.readline()
435 627 if line == '\r\n':
436 628 break
437 629 return True
438 630
439 631 if version == 'HTTP/1.0':
440 632 res.version = 10
441 633 elif version.startswith('HTTP/1.'):
442 634 res.version = 11
443 635 elif version == 'HTTP/0.9':
444 636 res.version = 9
445 637 else:
446 638 raise httplib.UnknownProtocol(version)
447 639
448 640 if res.version == 9:
449 641 res.length = None
450 642 res.chunked = 0
451 643 res.will_close = 1
452 644 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
453 645 return False
454 646
455 647 res.msg = httplib.HTTPMessage(res.fp)
456 648 res.msg.fp = None
457 649
458 650 # are we using the chunked-style of transfer encoding?
459 651 trenc = res.msg.getheader('transfer-encoding')
460 652 if trenc and trenc.lower() == "chunked":
461 653 res.chunked = 1
462 654 res.chunk_left = None
463 655 else:
464 656 res.chunked = 0
465 657
466 658 # will the connection close at the end of the response?
467 659 res.will_close = res._check_close()
468 660
469 661 # do we have a Content-Length?
470 662 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
471 663 length = res.msg.getheader('content-length')
472 664 if length and not res.chunked:
473 665 try:
474 666 res.length = int(length)
475 667 except ValueError:
476 668 res.length = None
477 669 else:
478 670 if res.length < 0: # ignore nonsensical negative lengths
479 671 res.length = None
480 672 else:
481 673 res.length = None
482 674
483 675 # does the body have a fixed length? (of zero)
484 676 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
485 677 100 <= status < 200 or # 1xx codes
486 678 res._method == 'HEAD'):
487 679 res.length = 0
488 680
489 681 # if the connection remains open, and we aren't using chunked, and
490 682 # a content-length was not provided, then assume that the connection
491 683 # WILL close.
492 684 if (not res.will_close and
493 685 not res.chunked and
494 686 res.length is None):
495 687 res.will_close = 1
496 688
497 689 self.proxyres = res
498 690
499 691 return False
500 692
501 693 class httphandler(keepalive.HTTPHandler):
502 694 def http_open(self, req):
503 695 return self.do_open(httpconnection, req)
504 696
505 697 def _start_transaction(self, h, req):
506 698 _generic_start_transaction(self, h, req)
507 699 return keepalive.HTTPHandler._start_transaction(self, h, req)
508 700
509 701 def _verifycert(cert, hostname):
510 702 '''Verify that cert (in socket.getpeercert() format) matches hostname.
511 703 CRLs is not handled.
512 704
513 705 Returns error message if any problems are found and None on success.
514 706 '''
515 707 if not cert:
516 708 return _('no certificate received')
517 709 dnsname = hostname.lower()
518 710 def matchdnsname(certname):
519 711 return (certname == dnsname or
520 712 '.' in dnsname and certname == '*.' + dnsname.split('.', 1)[1])
521 713
522 714 san = cert.get('subjectAltName', [])
523 715 if san:
524 716 certnames = [value.lower() for key, value in san if key == 'DNS']
525 717 for name in certnames:
526 718 if matchdnsname(name):
527 719 return None
528 720 return _('certificate is for %s') % ', '.join(certnames)
529 721
530 722 # subject is only checked when subjectAltName is empty
531 723 for s in cert.get('subject', []):
532 724 key, value = s[0]
533 725 if key == 'commonName':
534 726 try:
535 727 # 'subject' entries are unicode
536 728 certname = value.lower().encode('ascii')
537 729 except UnicodeEncodeError:
538 730 return _('IDN in certificate not supported')
539 731 if matchdnsname(certname):
540 732 return None
541 733 return _('certificate is for %s') % certname
542 734 return _('no commonName or subjectAltName found in certificate')
543 735
544 736 if has_https:
545 737 class httpsconnection(httplib.HTTPSConnection):
546 738 response_class = keepalive.HTTPResponse
547 739 # must be able to send big bundle as stream.
548 740 send = _gen_sendfile(keepalive.safesend)
549 741 getresponse = keepalive.wrapgetresponse(httplib.HTTPSConnection)
550 742
551 743 def connect(self):
552 744 self.sock = _create_connection((self.host, self.port))
553 745
554 746 host = self.host
555 747 if self.realhostport: # use CONNECT proxy
556 748 something = _generic_proxytunnel(self)
557 749 host = self.realhostport.rsplit(':', 1)[0]
558 750
559 751 cacerts = self.ui.config('web', 'cacerts')
560 752 hostfingerprint = self.ui.config('hostfingerprints', host)
561 753
562 754 if cacerts and not hostfingerprint:
563 755 cacerts = util.expandpath(cacerts)
564 756 if not os.path.exists(cacerts):
565 757 raise util.Abort(_('could not find '
566 758 'web.cacerts: %s') % cacerts)
567 759 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
568 760 self.cert_file, cert_reqs=CERT_REQUIRED,
569 761 ca_certs=cacerts)
570 762 msg = _verifycert(self.sock.getpeercert(), host)
571 763 if msg:
572 764 raise util.Abort(_('%s certificate error: %s '
573 765 '(use --insecure to connect '
574 766 'insecurely)') % (host, msg))
575 767 self.ui.debug('%s certificate successfully verified\n' % host)
576 768 else:
577 769 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
578 770 self.cert_file)
579 771 if hasattr(self.sock, 'getpeercert'):
580 772 peercert = self.sock.getpeercert(True)
581 773 peerfingerprint = util.sha1(peercert).hexdigest()
582 774 nicefingerprint = ":".join([peerfingerprint[x:x + 2]
583 775 for x in xrange(0, len(peerfingerprint), 2)])
584 776 if hostfingerprint:
585 777 if peerfingerprint.lower() != \
586 778 hostfingerprint.replace(':', '').lower():
587 779 raise util.Abort(_('invalid certificate for %s '
588 780 'with fingerprint %s') %
589 781 (host, nicefingerprint))
590 782 self.ui.debug('%s certificate matched fingerprint %s\n' %
591 783 (host, nicefingerprint))
592 784 else:
593 785 self.ui.warn(_('warning: %s certificate '
594 786 'with fingerprint %s not verified '
595 787 '(check hostfingerprints or web.cacerts '
596 788 'config setting)\n') %
597 789 (host, nicefingerprint))
598 790 else: # python 2.5 ?
599 791 if hostfingerprint:
600 792 raise util.Abort(_('no certificate for %s with '
601 793 'configured hostfingerprint') % host)
602 794 self.ui.warn(_('warning: %s certificate not verified '
603 795 '(check web.cacerts config setting)\n') %
604 796 host)
605 797
606 798 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
607 799 def __init__(self, ui):
608 800 keepalive.KeepAliveHandler.__init__(self)
609 801 urllib2.HTTPSHandler.__init__(self)
610 802 self.ui = ui
611 803 self.pwmgr = passwordmgr(self.ui)
612 804
613 805 def _start_transaction(self, h, req):
614 806 _generic_start_transaction(self, h, req)
615 807 return keepalive.KeepAliveHandler._start_transaction(self, h, req)
616 808
617 809 def https_open(self, req):
618 810 res = readauthforuri(self.ui, req.get_full_url())
619 811 if res:
620 812 group, auth = res
621 813 self.auth = auth
622 814 self.ui.debug("using auth.%s.* for authentication\n" % group)
623 815 else:
624 816 self.auth = None
625 817 return self.do_open(self._makeconnection, req)
626 818
627 819 def _makeconnection(self, host, port=None, *args, **kwargs):
628 820 keyfile = None
629 821 certfile = None
630 822
631 823 if len(args) >= 1: # key_file
632 824 keyfile = args[0]
633 825 if len(args) >= 2: # cert_file
634 826 certfile = args[1]
635 827 args = args[2:]
636 828
637 829 # if the user has specified different key/cert files in
638 830 # hgrc, we prefer these
639 831 if self.auth and 'key' in self.auth and 'cert' in self.auth:
640 832 keyfile = self.auth['key']
641 833 certfile = self.auth['cert']
642 834
643 835 conn = httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
644 836 conn.ui = self.ui
645 837 return conn
646 838
647 839 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
648 840 def __init__(self, *args, **kwargs):
649 841 urllib2.HTTPDigestAuthHandler.__init__(self, *args, **kwargs)
650 842 self.retried_req = None
651 843
652 844 def reset_retry_count(self):
653 845 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
654 846 # forever. We disable reset_retry_count completely and reset in
655 847 # http_error_auth_reqed instead.
656 848 pass
657 849
658 850 def http_error_auth_reqed(self, auth_header, host, req, headers):
659 851 # Reset the retry counter once for each request.
660 852 if req is not self.retried_req:
661 853 self.retried_req = req
662 854 self.retried = 0
663 855 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
664 856 # it doesn't know about the auth type requested. This can happen if
665 857 # somebody is using BasicAuth and types a bad password.
666 858 try:
667 859 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
668 860 self, auth_header, host, req, headers)
669 861 except ValueError, inst:
670 862 arg = inst.args[0]
671 863 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
672 864 return
673 865 raise
674 866
675 867 class httpbasicauthhandler(urllib2.HTTPBasicAuthHandler):
676 868 def __init__(self, *args, **kwargs):
677 869 urllib2.HTTPBasicAuthHandler.__init__(self, *args, **kwargs)
678 870 self.retried_req = None
679 871
680 872 def reset_retry_count(self):
681 873 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
682 874 # forever. We disable reset_retry_count completely and reset in
683 875 # http_error_auth_reqed instead.
684 876 pass
685 877
686 878 def http_error_auth_reqed(self, auth_header, host, req, headers):
687 879 # Reset the retry counter once for each request.
688 880 if req is not self.retried_req:
689 881 self.retried_req = req
690 882 self.retried = 0
691 883 return urllib2.HTTPBasicAuthHandler.http_error_auth_reqed(
692 884 self, auth_header, host, req, headers)
693 885
694 886 def getauthinfo(path):
695 887 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
696 888 if not urlpath:
697 889 urlpath = '/'
698 890 if scheme != 'file':
699 891 # XXX: why are we quoting the path again with some smart
700 892 # heuristic here? Anyway, it cannot be done with file://
701 893 # urls since path encoding is os/fs dependent (see
702 894 # urllib.pathname2url() for details).
703 895 urlpath = quotepath(urlpath)
704 896 host, port, user, passwd = netlocsplit(netloc)
705 897
706 898 # urllib cannot handle URLs with embedded user or passwd
707 899 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
708 900 urlpath, query, frag))
709 901 if user:
710 902 netloc = host
711 903 if port:
712 904 netloc += ':' + port
713 905 # Python < 2.4.3 uses only the netloc to search for a password
714 906 authinfo = (None, (url, netloc), user, passwd or '')
715 907 else:
716 908 authinfo = None
717 909 return url, authinfo
718 910
719 911 handlerfuncs = []
720 912
721 913 def opener(ui, authinfo=None):
722 914 '''
723 915 construct an opener suitable for urllib2
724 916 authinfo will be added to the password manager
725 917 '''
726 918 handlers = [httphandler()]
727 919 if has_https:
728 920 handlers.append(httpshandler(ui))
729 921
730 922 handlers.append(proxyhandler(ui))
731 923
732 924 passmgr = passwordmgr(ui)
733 925 if authinfo is not None:
734 926 passmgr.add_password(*authinfo)
735 927 user, passwd = authinfo[2:4]
736 928 ui.debug('http auth: user %s, password %s\n' %
737 929 (user, passwd and '*' * len(passwd) or 'not set'))
738 930
739 931 handlers.extend((httpbasicauthhandler(passmgr),
740 932 httpdigestauthhandler(passmgr)))
741 933 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
742 934 opener = urllib2.build_opener(*handlers)
743 935
744 936 # 1.0 here is the _protocol_ version
745 937 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
746 938 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
747 939 return opener
748 940
749 941 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
750 942
751 943 def open(ui, url, data=None):
752 944 scheme = None
753 945 m = scheme_re.search(url)
754 946 if m:
755 947 scheme = m.group(1).lower()
756 948 if not scheme:
757 949 path = util.normpath(os.path.abspath(url))
758 950 url = 'file://' + urllib.pathname2url(path)
759 951 authinfo = None
760 952 else:
761 953 url, authinfo = getauthinfo(url)
762 954 return opener(ui, authinfo).open(url, data)
@@ -1,54 +1,190 b''
1 1 import sys
2 2
3 3 def check(a, b):
4 4 if a != b:
5 5 print (a, b)
6 6
7 7 def cert(cn):
8 8 return dict(subject=((('commonName', cn),),))
9 9
10 10 from mercurial.url import _verifycert
11 11
12 12 # Test non-wildcard certificates
13 13 check(_verifycert(cert('example.com'), 'example.com'),
14 14 None)
15 15 check(_verifycert(cert('example.com'), 'www.example.com'),
16 16 'certificate is for example.com')
17 17 check(_verifycert(cert('www.example.com'), 'example.com'),
18 18 'certificate is for www.example.com')
19 19
20 20 # Test wildcard certificates
21 21 check(_verifycert(cert('*.example.com'), 'www.example.com'),
22 22 None)
23 23 check(_verifycert(cert('*.example.com'), 'example.com'),
24 24 'certificate is for *.example.com')
25 25 check(_verifycert(cert('*.example.com'), 'w.w.example.com'),
26 26 'certificate is for *.example.com')
27 27
28 28 # Test subjectAltName
29 29 san_cert = {'subject': ((('commonName', 'example.com'),),),
30 30 'subjectAltName': (('DNS', '*.example.net'),
31 31 ('DNS', 'example.net'))}
32 32 check(_verifycert(san_cert, 'example.net'),
33 33 None)
34 34 check(_verifycert(san_cert, 'foo.example.net'),
35 35 None)
36 36 # subject is only checked when subjectAltName is empty
37 37 check(_verifycert(san_cert, 'example.com'),
38 38 'certificate is for *.example.net, example.net')
39 39
40 40 # Avoid some pitfalls
41 41 check(_verifycert(cert('*.foo'), 'foo'),
42 42 'certificate is for *.foo')
43 43 check(_verifycert(cert('*o'), 'foo'),
44 44 'certificate is for *o')
45 45
46 46 check(_verifycert({'subject': ()},
47 47 'example.com'),
48 48 'no commonName or subjectAltName found in certificate')
49 49 check(_verifycert(None, 'example.com'),
50 50 'no certificate received')
51 51
52 import doctest
53
54 def test_url():
55 """
56 >>> from mercurial.url import url
57
58 This tests for edge cases in url.URL's parsing algorithm. Most of
59 these aren't useful for documentation purposes, so they aren't
60 part of the class's doc tests.
61
62 Query strings and fragments:
63
64 >>> url('http://host/a?b#c')
65 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
66 >>> url('http://host/a?')
67 <url scheme: 'http', host: 'host', path: 'a'>
68 >>> url('http://host/a#b#c')
69 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b#c'>
70 >>> url('http://host/a#b?c')
71 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b?c'>
72 >>> url('http://host/?a#b')
73 <url scheme: 'http', host: 'host', path: '', query: 'a', fragment: 'b'>
74 >>> url('http://host/?a#b', parse_query=False)
75 <url scheme: 'http', host: 'host', path: '?a', fragment: 'b'>
76 >>> url('http://host/?a#b', parse_fragment=False)
77 <url scheme: 'http', host: 'host', path: '', query: 'a#b'>
78 >>> url('http://host/?a#b', parse_query=False, parse_fragment=False)
79 <url scheme: 'http', host: 'host', path: '?a#b'>
80
81 IPv6 addresses:
82
83 >>> url('ldap://[2001:db8::7]/c=GB?objectClass?one')
84 <url scheme: 'ldap', host: '[2001:db8::7]', path: 'c=GB',
85 query: 'objectClass?one'>
86 >>> url('ldap://joe:xxx@[2001:db8::7]:80/c=GB?objectClass?one')
87 <url scheme: 'ldap', user: 'joe', passwd: 'xxx', host: '[2001:db8::7]',
88 port: '80', path: 'c=GB', query: 'objectClass?one'>
89
90 Missing scheme, host, etc.:
91
92 >>> url('://192.0.2.16:80/')
93 <url path: '://192.0.2.16:80/'>
94 >>> url('http://mercurial.selenic.com')
95 <url scheme: 'http', host: 'mercurial.selenic.com'>
96 >>> url('/foo')
97 <url path: '/foo'>
98 >>> url('bundle:/foo')
99 <url scheme: 'bundle', path: '/foo'>
100 >>> url('a?b#c')
101 <url path: 'a?b', fragment: 'c'>
102 >>> url('http://x.com?arg=/foo')
103 <url scheme: 'http', host: 'x.com', query: 'arg=/foo'>
104 >>> url('http://joe:xxx@/foo')
105 <url scheme: 'http', user: 'joe', passwd: 'xxx', path: 'foo'>
106
107 Just a scheme and a path:
108
109 >>> url('mailto:John.Doe@example.com')
110 <url scheme: 'mailto', path: 'John.Doe@example.com'>
111 >>> url('a:b:c:d')
112 <url scheme: 'a', path: 'b:c:d'>
113
114 SSH examples:
115
116 >>> url('ssh://joe@host//home/joe')
117 <url scheme: 'ssh', user: 'joe', host: 'host', path: '/home/joe'>
118 >>> url('ssh://joe:xxx@host/src')
119 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', path: 'src'>
120 >>> url('ssh://joe:xxx@host')
121 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host'>
122 >>> url('ssh://joe@host')
123 <url scheme: 'ssh', user: 'joe', host: 'host'>
124 >>> url('ssh://host')
125 <url scheme: 'ssh', host: 'host'>
126 >>> url('ssh://')
127 <url scheme: 'ssh'>
128 >>> url('ssh:')
129 <url scheme: 'ssh'>
130
131 Non-numeric port:
132
133 >>> url('http://example.com:dd')
134 <url scheme: 'http', host: 'example.com', port: 'dd'>
135 >>> url('ssh://joe:xxx@host:ssh/foo')
136 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', port: 'ssh',
137 path: 'foo'>
138
139 Bad authentication credentials:
140
141 >>> url('http://joe@joeville:123@4:@host/a?b#c')
142 <url scheme: 'http', user: 'joe@joeville', passwd: '123@4:',
143 host: 'host', path: 'a', query: 'b', fragment: 'c'>
144 >>> url('http://!*#?/@!*#?/:@host/a?b#c')
145 <url scheme: 'http', host: '!*', fragment: '?/@!*#?/:@host/a?b#c'>
146 >>> url('http://!*#?@!*#?:@host/a?b#c')
147 <url scheme: 'http', host: '!*', fragment: '?@!*#?:@host/a?b#c'>
148 >>> url('http://!*@:!*@@host/a?b#c')
149 <url scheme: 'http', user: '!*@', passwd: '!*@', host: 'host',
150 path: 'a', query: 'b', fragment: 'c'>
151
152 File paths:
153
154 >>> url('a/b/c/d.g.f')
155 <url path: 'a/b/c/d.g.f'>
156 >>> url('/x///z/y/')
157 <url path: '/x///z/y/'>
158
159 Empty URL:
160
161 >>> u = url('')
162 >>> u
163 <url path: ''>
164 >>> str(u)
165 ''
166
167 Empty path with query string:
168
169 >>> str(url('http://foo/?bar'))
170 'http://foo/?bar'
171
172 Invalid path:
173
174 >>> u = url('http://foo/bar')
175 >>> u.path = 'bar'
176 >>> str(u)
177 'http://foo/bar'
178
179 >>> u = url('file:///foo/bar/baz')
180 >>> u
181 <url scheme: 'file', path: '/foo/bar/baz'>
182 >>> str(u)
183 'file:/foo/bar/baz'
184 """
185
186 doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
187
52 188 # Unicode (IDN) certname isn't supported
53 189 check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'),
54 190 'IDN in certificate not supported')
General Comments 0
You need to be logged in to leave comments. Login now