##// END OF EJS Templates
url: be stricter about detecting schemes...
Brodie Rao -
r13848:b2798c1d default
parent child Browse files
Show More
@@ -1,960 +1,961
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 import urllib, urllib2, httplib, os, socket, cStringIO
10 import urllib, urllib2, httplib, os, socket, cStringIO, re
11 11 import __builtin__
12 12 from i18n import _
13 13 import keepalive, util
14 14
15 15 class url(object):
16 16 """Reliable URL parser.
17 17
18 18 This parses URLs and provides attributes for the following
19 19 components:
20 20
21 21 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
22 22
23 23 Missing components are set to None. The only exception is
24 24 fragment, which is set to '' if present but empty.
25 25
26 26 If parsefragment is False, fragment is included in query. If
27 27 parsequery is False, query is included in path. If both are
28 28 False, both fragment and query are included in path.
29 29
30 30 See http://www.ietf.org/rfc/rfc2396.txt for more information.
31 31
32 32 Note that for backward compatibility reasons, bundle URLs do not
33 33 take host names. That means 'bundle://../' has a path of '../'.
34 34
35 35 Examples:
36 36
37 37 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
38 38 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
39 39 >>> url('ssh://[::1]:2200//home/joe/repo')
40 40 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
41 41 >>> url('file:///home/joe/repo')
42 42 <url scheme: 'file', path: '/home/joe/repo'>
43 43 >>> url('bundle:foo')
44 44 <url scheme: 'bundle', path: 'foo'>
45 45 >>> url('bundle://../foo')
46 46 <url scheme: 'bundle', path: '../foo'>
47 47 >>> url('c:\\\\foo\\\\bar')
48 48 <url path: 'c:\\\\foo\\\\bar'>
49 49
50 50 Authentication credentials:
51 51
52 52 >>> url('ssh://joe:xyz@x/repo')
53 53 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
54 54 >>> url('ssh://joe@x/repo')
55 55 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
56 56
57 57 Query strings and fragments:
58 58
59 59 >>> url('http://host/a?b#c')
60 60 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
61 61 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
62 62 <url scheme: 'http', host: 'host', path: 'a?b#c'>
63 63 """
64 64
65 65 _safechars = "!~*'()+"
66 66 _safepchars = "/!~*'()+"
67 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
67 68
68 69 def __init__(self, path, parsequery=True, parsefragment=True):
69 70 # We slowly chomp away at path until we have only the path left
70 71 self.scheme = self.user = self.passwd = self.host = None
71 72 self.port = self.path = self.query = self.fragment = None
72 73 self._localpath = True
73 74 self._hostport = ''
74 75 self._origpath = path
75 76
76 77 # special case for Windows drive letters
77 78 if hasdriveletter(path):
78 79 self.path = path
79 80 return
80 81
81 82 # For compatibility reasons, we can't handle bundle paths as
82 83 # normal URLS
83 84 if path.startswith('bundle:'):
84 85 self.scheme = 'bundle'
85 86 path = path[7:]
86 87 if path.startswith('//'):
87 88 path = path[2:]
88 89 self.path = path
89 90 return
90 91
91 if not path.startswith('/') and ':' in path:
92 if self._matchscheme(path):
92 93 parts = path.split(':', 1)
93 94 if parts[0]:
94 95 self.scheme, path = parts
95 96 self._localpath = False
96 97
97 98 if not path:
98 99 path = None
99 100 if self._localpath:
100 101 self.path = ''
101 102 return
102 103 else:
103 104 if parsefragment and '#' in path:
104 105 path, self.fragment = path.split('#', 1)
105 106 if not path:
106 107 path = None
107 108 if self._localpath:
108 109 self.path = path
109 110 return
110 111
111 112 if parsequery and '?' in path:
112 113 path, self.query = path.split('?', 1)
113 114 if not path:
114 115 path = None
115 116 if not self.query:
116 117 self.query = None
117 118
118 119 # // is required to specify a host/authority
119 120 if path and path.startswith('//'):
120 121 parts = path[2:].split('/', 1)
121 122 if len(parts) > 1:
122 123 self.host, path = parts
123 124 path = path
124 125 else:
125 126 self.host = parts[0]
126 127 path = None
127 128 if not self.host:
128 129 self.host = None
129 130 if path:
130 131 path = '/' + path
131 132
132 133 if self.host and '@' in self.host:
133 134 self.user, self.host = self.host.rsplit('@', 1)
134 135 if ':' in self.user:
135 136 self.user, self.passwd = self.user.split(':', 1)
136 137 if not self.host:
137 138 self.host = None
138 139
139 140 # Don't split on colons in IPv6 addresses without ports
140 141 if (self.host and ':' in self.host and
141 142 not (self.host.startswith('[') and self.host.endswith(']'))):
142 143 self._hostport = self.host
143 144 self.host, self.port = self.host.rsplit(':', 1)
144 145 if not self.host:
145 146 self.host = None
146 147
147 148 if (self.host and self.scheme == 'file' and
148 149 self.host not in ('localhost', '127.0.0.1', '[::1]')):
149 150 raise util.Abort(_('file:// URLs can only refer to localhost'))
150 151
151 152 self.path = path
152 153
153 154 for a in ('user', 'passwd', 'host', 'port',
154 155 'path', 'query', 'fragment'):
155 156 v = getattr(self, a)
156 157 if v is not None:
157 158 setattr(self, a, urllib.unquote(v))
158 159
159 160 def __repr__(self):
160 161 attrs = []
161 162 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
162 163 'query', 'fragment'):
163 164 v = getattr(self, a)
164 165 if v is not None:
165 166 attrs.append('%s: %r' % (a, v))
166 167 return '<url %s>' % ', '.join(attrs)
167 168
168 169 def __str__(self):
169 170 """Join the URL's components back into a URL string.
170 171
171 172 Examples:
172 173
173 174 >>> str(url('http://user:pw@host:80/?foo#bar'))
174 175 'http://user:pw@host:80/?foo#bar'
175 176 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
176 177 'ssh://user:pw@[::1]:2200//home/joe#'
177 178 >>> str(url('http://localhost:80//'))
178 179 'http://localhost:80//'
179 180 >>> str(url('http://localhost:80/'))
180 181 'http://localhost:80/'
181 182 >>> str(url('http://localhost:80'))
182 183 'http://localhost:80/'
183 184 >>> str(url('bundle:foo'))
184 185 'bundle:foo'
185 186 >>> str(url('bundle://../foo'))
186 187 'bundle:../foo'
187 188 >>> str(url('path'))
188 189 'path'
189 190 """
190 191 if self._localpath:
191 192 s = self.path
192 193 if self.scheme == 'bundle':
193 194 s = 'bundle:' + s
194 195 if self.fragment:
195 196 s += '#' + self.fragment
196 197 return s
197 198
198 199 s = self.scheme + ':'
199 200 if (self.user or self.passwd or self.host or
200 201 self.scheme and not self.path):
201 202 s += '//'
202 203 if self.user:
203 204 s += urllib.quote(self.user, safe=self._safechars)
204 205 if self.passwd:
205 206 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
206 207 if self.user or self.passwd:
207 208 s += '@'
208 209 if self.host:
209 210 if not (self.host.startswith('[') and self.host.endswith(']')):
210 211 s += urllib.quote(self.host)
211 212 else:
212 213 s += self.host
213 214 if self.port:
214 215 s += ':' + urllib.quote(self.port)
215 216 if self.host:
216 217 s += '/'
217 218 if self.path:
218 219 s += urllib.quote(self.path, safe=self._safepchars)
219 220 if self.query:
220 221 s += '?' + urllib.quote(self.query, safe=self._safepchars)
221 222 if self.fragment is not None:
222 223 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
223 224 return s
224 225
225 226 def authinfo(self):
226 227 user, passwd = self.user, self.passwd
227 228 try:
228 229 self.user, self.passwd = None, None
229 230 s = str(self)
230 231 finally:
231 232 self.user, self.passwd = user, passwd
232 233 if not self.user:
233 234 return (s, None)
234 235 return (s, (None, (str(self), self.host),
235 236 self.user, self.passwd or ''))
236 237
237 238 def localpath(self):
238 239 if self.scheme == 'file' or self.scheme == 'bundle':
239 240 path = self.path or '/'
240 241 # For Windows, we need to promote hosts containing drive
241 242 # letters to paths with drive letters.
242 243 if hasdriveletter(self._hostport):
243 244 path = self._hostport + '/' + self.path
244 245 elif self.host is not None and self.path:
245 246 path = '/' + path
246 247 # We also need to handle the case of file:///C:/, which
247 248 # should return C:/, not /C:/.
248 249 elif hasdriveletter(path):
249 250 # Strip leading slash from paths with drive names
250 251 return path[1:]
251 252 return path
252 253 return self._origpath
253 254
254 255 def hasscheme(path):
255 256 return bool(url(path).scheme)
256 257
257 258 def hasdriveletter(path):
258 259 return path[1:2] == ':' and path[0:1].isalpha()
259 260
260 261 def localpath(path):
261 262 return url(path, parsequery=False, parsefragment=False).localpath()
262 263
263 264 def hidepassword(u):
264 265 '''hide user credential in a url string'''
265 266 u = url(u)
266 267 if u.passwd:
267 268 u.passwd = '***'
268 269 return str(u)
269 270
270 271 def removeauth(u):
271 272 '''remove all authentication information from a url string'''
272 273 u = url(u)
273 274 u.user = u.passwd = None
274 275 return str(u)
275 276
276 277 def netlocsplit(netloc):
277 278 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
278 279
279 280 a = netloc.find('@')
280 281 if a == -1:
281 282 user, passwd = None, None
282 283 else:
283 284 userpass, netloc = netloc[:a], netloc[a + 1:]
284 285 c = userpass.find(':')
285 286 if c == -1:
286 287 user, passwd = urllib.unquote(userpass), None
287 288 else:
288 289 user = urllib.unquote(userpass[:c])
289 290 passwd = urllib.unquote(userpass[c + 1:])
290 291 c = netloc.find(':')
291 292 if c == -1:
292 293 host, port = netloc, None
293 294 else:
294 295 host, port = netloc[:c], netloc[c + 1:]
295 296 return host, port, user, passwd
296 297
297 298 def netlocunsplit(host, port, user=None, passwd=None):
298 299 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
299 300 if port:
300 301 hostport = host + ':' + port
301 302 else:
302 303 hostport = host
303 304 if user:
304 305 quote = lambda s: urllib.quote(s, safe='')
305 306 if passwd:
306 307 userpass = quote(user) + ':' + quote(passwd)
307 308 else:
308 309 userpass = quote(user)
309 310 return userpass + '@' + hostport
310 311 return hostport
311 312
312 313 def readauthforuri(ui, uri):
313 314 # Read configuration
314 315 config = dict()
315 316 for key, val in ui.configitems('auth'):
316 317 if '.' not in key:
317 318 ui.warn(_("ignoring invalid [auth] key '%s'\n") % key)
318 319 continue
319 320 group, setting = key.rsplit('.', 1)
320 321 gdict = config.setdefault(group, dict())
321 322 if setting in ('username', 'cert', 'key'):
322 323 val = util.expandpath(val)
323 324 gdict[setting] = val
324 325
325 326 # Find the best match
326 327 scheme, hostpath = uri.split('://', 1)
327 328 bestlen = 0
328 329 bestauth = None
329 330 for group, auth in config.iteritems():
330 331 prefix = auth.get('prefix')
331 332 if not prefix:
332 333 continue
333 334 p = prefix.split('://', 1)
334 335 if len(p) > 1:
335 336 schemes, prefix = [p[0]], p[1]
336 337 else:
337 338 schemes = (auth.get('schemes') or 'https').split()
338 339 if (prefix == '*' or hostpath.startswith(prefix)) and \
339 340 len(prefix) > bestlen and scheme in schemes:
340 341 bestlen = len(prefix)
341 342 bestauth = group, auth
342 343 return bestauth
343 344
344 345 _safe = ('abcdefghijklmnopqrstuvwxyz'
345 346 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
346 347 '0123456789' '_.-/')
347 348 _safeset = None
348 349 _hex = None
349 350 def quotepath(path):
350 351 '''quote the path part of a URL
351 352
352 353 This is similar to urllib.quote, but it also tries to avoid
353 354 quoting things twice (inspired by wget):
354 355
355 356 >>> quotepath('abc def')
356 357 'abc%20def'
357 358 >>> quotepath('abc%20def')
358 359 'abc%20def'
359 360 >>> quotepath('abc%20 def')
360 361 'abc%20%20def'
361 362 >>> quotepath('abc def%20')
362 363 'abc%20def%20'
363 364 >>> quotepath('abc def%2')
364 365 'abc%20def%252'
365 366 >>> quotepath('abc def%')
366 367 'abc%20def%25'
367 368 '''
368 369 global _safeset, _hex
369 370 if _safeset is None:
370 371 _safeset = set(_safe)
371 372 _hex = set('abcdefABCDEF0123456789')
372 373 l = list(path)
373 374 for i in xrange(len(l)):
374 375 c = l[i]
375 376 if (c == '%' and i + 2 < len(l) and
376 377 l[i + 1] in _hex and l[i + 2] in _hex):
377 378 pass
378 379 elif c not in _safeset:
379 380 l[i] = '%%%02X' % ord(c)
380 381 return ''.join(l)
381 382
382 383 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
383 384 def __init__(self, ui):
384 385 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
385 386 self.ui = ui
386 387
387 388 def find_user_password(self, realm, authuri):
388 389 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
389 390 self, realm, authuri)
390 391 user, passwd = authinfo
391 392 if user and passwd:
392 393 self._writedebug(user, passwd)
393 394 return (user, passwd)
394 395
395 396 if not user:
396 397 res = readauthforuri(self.ui, authuri)
397 398 if res:
398 399 group, auth = res
399 400 user, passwd = auth.get('username'), auth.get('password')
400 401 self.ui.debug("using auth.%s.* for authentication\n" % group)
401 402 if not user or not passwd:
402 403 if not self.ui.interactive():
403 404 raise util.Abort(_('http authorization required'))
404 405
405 406 self.ui.write(_("http authorization required\n"))
406 407 self.ui.write(_("realm: %s\n") % realm)
407 408 if user:
408 409 self.ui.write(_("user: %s\n") % user)
409 410 else:
410 411 user = self.ui.prompt(_("user:"), default=None)
411 412
412 413 if not passwd:
413 414 passwd = self.ui.getpass()
414 415
415 416 self.add_password(realm, authuri, user, passwd)
416 417 self._writedebug(user, passwd)
417 418 return (user, passwd)
418 419
419 420 def _writedebug(self, user, passwd):
420 421 msg = _('http auth: user %s, password %s\n')
421 422 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
422 423
423 424 class proxyhandler(urllib2.ProxyHandler):
424 425 def __init__(self, ui):
425 426 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
426 427 # XXX proxyauthinfo = None
427 428
428 429 if proxyurl:
429 430 # proxy can be proper url or host[:port]
430 431 if not (proxyurl.startswith('http:') or
431 432 proxyurl.startswith('https:')):
432 433 proxyurl = 'http://' + proxyurl + '/'
433 434 proxy = url(proxyurl)
434 435 if not proxy.user:
435 436 proxy.user = ui.config("http_proxy", "user")
436 437 proxy.passwd = ui.config("http_proxy", "passwd")
437 438
438 439 # see if we should use a proxy for this url
439 440 no_list = ["localhost", "127.0.0.1"]
440 441 no_list.extend([p.lower() for
441 442 p in ui.configlist("http_proxy", "no")])
442 443 no_list.extend([p.strip().lower() for
443 444 p in os.getenv("no_proxy", '').split(',')
444 445 if p.strip()])
445 446 # "http_proxy.always" config is for running tests on localhost
446 447 if ui.configbool("http_proxy", "always"):
447 448 self.no_list = []
448 449 else:
449 450 self.no_list = no_list
450 451
451 452 proxyurl = str(proxy)
452 453 proxies = {'http': proxyurl, 'https': proxyurl}
453 454 ui.debug('proxying through http://%s:%s\n' %
454 455 (proxy.host, proxy.port))
455 456 else:
456 457 proxies = {}
457 458
458 459 # urllib2 takes proxy values from the environment and those
459 460 # will take precedence if found, so drop them
460 461 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
461 462 try:
462 463 if env in os.environ:
463 464 del os.environ[env]
464 465 except OSError:
465 466 pass
466 467
467 468 urllib2.ProxyHandler.__init__(self, proxies)
468 469 self.ui = ui
469 470
470 471 def proxy_open(self, req, proxy, type_):
471 472 host = req.get_host().split(':')[0]
472 473 if host in self.no_list:
473 474 return None
474 475
475 476 # work around a bug in Python < 2.4.2
476 477 # (it leaves a "\n" at the end of Proxy-authorization headers)
477 478 baseclass = req.__class__
478 479 class _request(baseclass):
479 480 def add_header(self, key, val):
480 481 if key.lower() == 'proxy-authorization':
481 482 val = val.strip()
482 483 return baseclass.add_header(self, key, val)
483 484 req.__class__ = _request
484 485
485 486 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
486 487
487 488 class httpsendfile(object):
488 489 """This is a wrapper around the objects returned by python's "open".
489 490
490 491 Its purpose is to send file-like objects via HTTP and, to do so, it
491 492 defines a __len__ attribute to feed the Content-Length header.
492 493 """
493 494
494 495 def __init__(self, ui, *args, **kwargs):
495 496 # We can't just "self._data = open(*args, **kwargs)" here because there
496 497 # is an "open" function defined in this module that shadows the global
497 498 # one
498 499 self.ui = ui
499 500 self._data = __builtin__.open(*args, **kwargs)
500 501 self.seek = self._data.seek
501 502 self.close = self._data.close
502 503 self.write = self._data.write
503 504 self._len = os.fstat(self._data.fileno()).st_size
504 505 self._pos = 0
505 506 self._total = len(self) / 1024 * 2
506 507
507 508 def read(self, *args, **kwargs):
508 509 try:
509 510 ret = self._data.read(*args, **kwargs)
510 511 except EOFError:
511 512 self.ui.progress(_('sending'), None)
512 513 self._pos += len(ret)
513 514 # We pass double the max for total because we currently have
514 515 # to send the bundle twice in the case of a server that
515 516 # requires authentication. Since we can't know until we try
516 517 # once whether authentication will be required, just lie to
517 518 # the user and maybe the push succeeds suddenly at 50%.
518 519 self.ui.progress(_('sending'), self._pos / 1024,
519 520 unit=_('kb'), total=self._total)
520 521 return ret
521 522
522 523 def __len__(self):
523 524 return self._len
524 525
525 526 def _gen_sendfile(orgsend):
526 527 def _sendfile(self, data):
527 528 # send a file
528 529 if isinstance(data, httpsendfile):
529 530 # if auth required, some data sent twice, so rewind here
530 531 data.seek(0)
531 532 for chunk in util.filechunkiter(data):
532 533 orgsend(self, chunk)
533 534 else:
534 535 orgsend(self, data)
535 536 return _sendfile
536 537
537 538 has_https = hasattr(urllib2, 'HTTPSHandler')
538 539 if has_https:
539 540 try:
540 541 # avoid using deprecated/broken FakeSocket in python 2.6
541 542 import ssl
542 543 _ssl_wrap_socket = ssl.wrap_socket
543 544 CERT_REQUIRED = ssl.CERT_REQUIRED
544 545 except ImportError:
545 546 CERT_REQUIRED = 2
546 547
547 548 def _ssl_wrap_socket(sock, key_file, cert_file,
548 549 cert_reqs=CERT_REQUIRED, ca_certs=None):
549 550 if ca_certs:
550 551 raise util.Abort(_(
551 552 'certificate checking requires Python 2.6'))
552 553
553 554 ssl = socket.ssl(sock, key_file, cert_file)
554 555 return httplib.FakeSocket(sock, ssl)
555 556
556 557 try:
557 558 _create_connection = socket.create_connection
558 559 except AttributeError:
559 560 _GLOBAL_DEFAULT_TIMEOUT = object()
560 561
561 562 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
562 563 source_address=None):
563 564 # lifted from Python 2.6
564 565
565 566 msg = "getaddrinfo returns an empty list"
566 567 host, port = address
567 568 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
568 569 af, socktype, proto, canonname, sa = res
569 570 sock = None
570 571 try:
571 572 sock = socket.socket(af, socktype, proto)
572 573 if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
573 574 sock.settimeout(timeout)
574 575 if source_address:
575 576 sock.bind(source_address)
576 577 sock.connect(sa)
577 578 return sock
578 579
579 580 except socket.error, msg:
580 581 if sock is not None:
581 582 sock.close()
582 583
583 584 raise socket.error, msg
584 585
585 586 class httpconnection(keepalive.HTTPConnection):
586 587 # must be able to send big bundle as stream.
587 588 send = _gen_sendfile(keepalive.HTTPConnection.send)
588 589
589 590 def connect(self):
590 591 if has_https and self.realhostport: # use CONNECT proxy
591 592 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
592 593 self.sock.connect((self.host, self.port))
593 594 if _generic_proxytunnel(self):
594 595 # we do not support client x509 certificates
595 596 self.sock = _ssl_wrap_socket(self.sock, None, None)
596 597 else:
597 598 keepalive.HTTPConnection.connect(self)
598 599
599 600 def getresponse(self):
600 601 proxyres = getattr(self, 'proxyres', None)
601 602 if proxyres:
602 603 if proxyres.will_close:
603 604 self.close()
604 605 self.proxyres = None
605 606 return proxyres
606 607 return keepalive.HTTPConnection.getresponse(self)
607 608
608 609 # general transaction handler to support different ways to handle
609 610 # HTTPS proxying before and after Python 2.6.3.
610 611 def _generic_start_transaction(handler, h, req):
611 612 if hasattr(req, '_tunnel_host') and req._tunnel_host:
612 613 tunnel_host = req._tunnel_host
613 614 if tunnel_host[:7] not in ['http://', 'https:/']:
614 615 tunnel_host = 'https://' + tunnel_host
615 616 new_tunnel = True
616 617 else:
617 618 tunnel_host = req.get_selector()
618 619 new_tunnel = False
619 620
620 621 if new_tunnel or tunnel_host == req.get_full_url(): # has proxy
621 622 u = url(tunnel_host)
622 623 if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS
623 624 h.realhostport = ':'.join([u.host, (u.port or '443')])
624 625 h.headers = req.headers.copy()
625 626 h.headers.update(handler.parent.addheaders)
626 627 return
627 628
628 629 h.realhostport = None
629 630 h.headers = None
630 631
631 632 def _generic_proxytunnel(self):
632 633 proxyheaders = dict(
633 634 [(x, self.headers[x]) for x in self.headers
634 635 if x.lower().startswith('proxy-')])
635 636 self._set_hostport(self.host, self.port)
636 637 self.send('CONNECT %s HTTP/1.0\r\n' % self.realhostport)
637 638 for header in proxyheaders.iteritems():
638 639 self.send('%s: %s\r\n' % header)
639 640 self.send('\r\n')
640 641
641 642 # majority of the following code is duplicated from
642 643 # httplib.HTTPConnection as there are no adequate places to
643 644 # override functions to provide the needed functionality
644 645 res = self.response_class(self.sock,
645 646 strict=self.strict,
646 647 method=self._method)
647 648
648 649 while True:
649 650 version, status, reason = res._read_status()
650 651 if status != httplib.CONTINUE:
651 652 break
652 653 while True:
653 654 skip = res.fp.readline().strip()
654 655 if not skip:
655 656 break
656 657 res.status = status
657 658 res.reason = reason.strip()
658 659
659 660 if res.status == 200:
660 661 while True:
661 662 line = res.fp.readline()
662 663 if line == '\r\n':
663 664 break
664 665 return True
665 666
666 667 if version == 'HTTP/1.0':
667 668 res.version = 10
668 669 elif version.startswith('HTTP/1.'):
669 670 res.version = 11
670 671 elif version == 'HTTP/0.9':
671 672 res.version = 9
672 673 else:
673 674 raise httplib.UnknownProtocol(version)
674 675
675 676 if res.version == 9:
676 677 res.length = None
677 678 res.chunked = 0
678 679 res.will_close = 1
679 680 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
680 681 return False
681 682
682 683 res.msg = httplib.HTTPMessage(res.fp)
683 684 res.msg.fp = None
684 685
685 686 # are we using the chunked-style of transfer encoding?
686 687 trenc = res.msg.getheader('transfer-encoding')
687 688 if trenc and trenc.lower() == "chunked":
688 689 res.chunked = 1
689 690 res.chunk_left = None
690 691 else:
691 692 res.chunked = 0
692 693
693 694 # will the connection close at the end of the response?
694 695 res.will_close = res._check_close()
695 696
696 697 # do we have a Content-Length?
697 698 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
698 699 length = res.msg.getheader('content-length')
699 700 if length and not res.chunked:
700 701 try:
701 702 res.length = int(length)
702 703 except ValueError:
703 704 res.length = None
704 705 else:
705 706 if res.length < 0: # ignore nonsensical negative lengths
706 707 res.length = None
707 708 else:
708 709 res.length = None
709 710
710 711 # does the body have a fixed length? (of zero)
711 712 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
712 713 100 <= status < 200 or # 1xx codes
713 714 res._method == 'HEAD'):
714 715 res.length = 0
715 716
716 717 # if the connection remains open, and we aren't using chunked, and
717 718 # a content-length was not provided, then assume that the connection
718 719 # WILL close.
719 720 if (not res.will_close and
720 721 not res.chunked and
721 722 res.length is None):
722 723 res.will_close = 1
723 724
724 725 self.proxyres = res
725 726
726 727 return False
727 728
728 729 class httphandler(keepalive.HTTPHandler):
729 730 def http_open(self, req):
730 731 return self.do_open(httpconnection, req)
731 732
732 733 def _start_transaction(self, h, req):
733 734 _generic_start_transaction(self, h, req)
734 735 return keepalive.HTTPHandler._start_transaction(self, h, req)
735 736
736 737 def _verifycert(cert, hostname):
737 738 '''Verify that cert (in socket.getpeercert() format) matches hostname.
738 739 CRLs is not handled.
739 740
740 741 Returns error message if any problems are found and None on success.
741 742 '''
742 743 if not cert:
743 744 return _('no certificate received')
744 745 dnsname = hostname.lower()
745 746 def matchdnsname(certname):
746 747 return (certname == dnsname or
747 748 '.' in dnsname and certname == '*.' + dnsname.split('.', 1)[1])
748 749
749 750 san = cert.get('subjectAltName', [])
750 751 if san:
751 752 certnames = [value.lower() for key, value in san if key == 'DNS']
752 753 for name in certnames:
753 754 if matchdnsname(name):
754 755 return None
755 756 return _('certificate is for %s') % ', '.join(certnames)
756 757
757 758 # subject is only checked when subjectAltName is empty
758 759 for s in cert.get('subject', []):
759 760 key, value = s[0]
760 761 if key == 'commonName':
761 762 try:
762 763 # 'subject' entries are unicode
763 764 certname = value.lower().encode('ascii')
764 765 except UnicodeEncodeError:
765 766 return _('IDN in certificate not supported')
766 767 if matchdnsname(certname):
767 768 return None
768 769 return _('certificate is for %s') % certname
769 770 return _('no commonName or subjectAltName found in certificate')
770 771
771 772 if has_https:
772 773 class httpsconnection(httplib.HTTPSConnection):
773 774 response_class = keepalive.HTTPResponse
774 775 # must be able to send big bundle as stream.
775 776 send = _gen_sendfile(keepalive.safesend)
776 777 getresponse = keepalive.wrapgetresponse(httplib.HTTPSConnection)
777 778
778 779 def connect(self):
779 780 self.sock = _create_connection((self.host, self.port))
780 781
781 782 host = self.host
782 783 if self.realhostport: # use CONNECT proxy
783 784 something = _generic_proxytunnel(self)
784 785 host = self.realhostport.rsplit(':', 1)[0]
785 786
786 787 cacerts = self.ui.config('web', 'cacerts')
787 788 hostfingerprint = self.ui.config('hostfingerprints', host)
788 789
789 790 if cacerts and not hostfingerprint:
790 791 cacerts = util.expandpath(cacerts)
791 792 if not os.path.exists(cacerts):
792 793 raise util.Abort(_('could not find '
793 794 'web.cacerts: %s') % cacerts)
794 795 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
795 796 self.cert_file, cert_reqs=CERT_REQUIRED,
796 797 ca_certs=cacerts)
797 798 msg = _verifycert(self.sock.getpeercert(), host)
798 799 if msg:
799 800 raise util.Abort(_('%s certificate error: %s '
800 801 '(use --insecure to connect '
801 802 'insecurely)') % (host, msg))
802 803 self.ui.debug('%s certificate successfully verified\n' % host)
803 804 else:
804 805 self.sock = _ssl_wrap_socket(self.sock, self.key_file,
805 806 self.cert_file)
806 807 if hasattr(self.sock, 'getpeercert'):
807 808 peercert = self.sock.getpeercert(True)
808 809 peerfingerprint = util.sha1(peercert).hexdigest()
809 810 nicefingerprint = ":".join([peerfingerprint[x:x + 2]
810 811 for x in xrange(0, len(peerfingerprint), 2)])
811 812 if hostfingerprint:
812 813 if peerfingerprint.lower() != \
813 814 hostfingerprint.replace(':', '').lower():
814 815 raise util.Abort(_('invalid certificate for %s '
815 816 'with fingerprint %s') %
816 817 (host, nicefingerprint))
817 818 self.ui.debug('%s certificate matched fingerprint %s\n' %
818 819 (host, nicefingerprint))
819 820 else:
820 821 self.ui.warn(_('warning: %s certificate '
821 822 'with fingerprint %s not verified '
822 823 '(check hostfingerprints or web.cacerts '
823 824 'config setting)\n') %
824 825 (host, nicefingerprint))
825 826 else: # python 2.5 ?
826 827 if hostfingerprint:
827 828 raise util.Abort(_('no certificate for %s with '
828 829 'configured hostfingerprint') % host)
829 830 self.ui.warn(_('warning: %s certificate not verified '
830 831 '(check web.cacerts config setting)\n') %
831 832 host)
832 833
833 834 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
834 835 def __init__(self, ui):
835 836 keepalive.KeepAliveHandler.__init__(self)
836 837 urllib2.HTTPSHandler.__init__(self)
837 838 self.ui = ui
838 839 self.pwmgr = passwordmgr(self.ui)
839 840
840 841 def _start_transaction(self, h, req):
841 842 _generic_start_transaction(self, h, req)
842 843 return keepalive.KeepAliveHandler._start_transaction(self, h, req)
843 844
844 845 def https_open(self, req):
845 846 res = readauthforuri(self.ui, req.get_full_url())
846 847 if res:
847 848 group, auth = res
848 849 self.auth = auth
849 850 self.ui.debug("using auth.%s.* for authentication\n" % group)
850 851 else:
851 852 self.auth = None
852 853 return self.do_open(self._makeconnection, req)
853 854
854 855 def _makeconnection(self, host, port=None, *args, **kwargs):
855 856 keyfile = None
856 857 certfile = None
857 858
858 859 if len(args) >= 1: # key_file
859 860 keyfile = args[0]
860 861 if len(args) >= 2: # cert_file
861 862 certfile = args[1]
862 863 args = args[2:]
863 864
864 865 # if the user has specified different key/cert files in
865 866 # hgrc, we prefer these
866 867 if self.auth and 'key' in self.auth and 'cert' in self.auth:
867 868 keyfile = self.auth['key']
868 869 certfile = self.auth['cert']
869 870
870 871 conn = httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
871 872 conn.ui = self.ui
872 873 return conn
873 874
874 875 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
875 876 def __init__(self, *args, **kwargs):
876 877 urllib2.HTTPDigestAuthHandler.__init__(self, *args, **kwargs)
877 878 self.retried_req = None
878 879
879 880 def reset_retry_count(self):
880 881 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
881 882 # forever. We disable reset_retry_count completely and reset in
882 883 # http_error_auth_reqed instead.
883 884 pass
884 885
885 886 def http_error_auth_reqed(self, auth_header, host, req, headers):
886 887 # Reset the retry counter once for each request.
887 888 if req is not self.retried_req:
888 889 self.retried_req = req
889 890 self.retried = 0
890 891 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
891 892 # it doesn't know about the auth type requested. This can happen if
892 893 # somebody is using BasicAuth and types a bad password.
893 894 try:
894 895 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
895 896 self, auth_header, host, req, headers)
896 897 except ValueError, inst:
897 898 arg = inst.args[0]
898 899 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
899 900 return
900 901 raise
901 902
902 903 class httpbasicauthhandler(urllib2.HTTPBasicAuthHandler):
903 904 def __init__(self, *args, **kwargs):
904 905 urllib2.HTTPBasicAuthHandler.__init__(self, *args, **kwargs)
905 906 self.retried_req = None
906 907
907 908 def reset_retry_count(self):
908 909 # Python 2.6.5 will call this on 401 or 407 errors and thus loop
909 910 # forever. We disable reset_retry_count completely and reset in
910 911 # http_error_auth_reqed instead.
911 912 pass
912 913
913 914 def http_error_auth_reqed(self, auth_header, host, req, headers):
914 915 # Reset the retry counter once for each request.
915 916 if req is not self.retried_req:
916 917 self.retried_req = req
917 918 self.retried = 0
918 919 return urllib2.HTTPBasicAuthHandler.http_error_auth_reqed(
919 920 self, auth_header, host, req, headers)
920 921
921 922 handlerfuncs = []
922 923
923 924 def opener(ui, authinfo=None):
924 925 '''
925 926 construct an opener suitable for urllib2
926 927 authinfo will be added to the password manager
927 928 '''
928 929 handlers = [httphandler()]
929 930 if has_https:
930 931 handlers.append(httpshandler(ui))
931 932
932 933 handlers.append(proxyhandler(ui))
933 934
934 935 passmgr = passwordmgr(ui)
935 936 if authinfo is not None:
936 937 passmgr.add_password(*authinfo)
937 938 user, passwd = authinfo[2:4]
938 939 ui.debug('http auth: user %s, password %s\n' %
939 940 (user, passwd and '*' * len(passwd) or 'not set'))
940 941
941 942 handlers.extend((httpbasicauthhandler(passmgr),
942 943 httpdigestauthhandler(passmgr)))
943 944 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
944 945 opener = urllib2.build_opener(*handlers)
945 946
946 947 # 1.0 here is the _protocol_ version
947 948 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
948 949 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
949 950 return opener
950 951
951 952 def open(ui, url_, data=None):
952 953 u = url(url_)
953 954 if u.scheme:
954 955 u.scheme = u.scheme.lower()
955 956 url_, authinfo = u.authinfo()
956 957 else:
957 958 path = util.normpath(os.path.abspath(url_))
958 959 url_ = 'file://' + urllib.pathname2url(path)
959 960 authinfo = None
960 961 return opener(ui, authinfo).open(url_, data)
@@ -1,199 +1,205
1 1 import sys
2 2
3 3 def check(a, b):
4 4 if a != b:
5 5 print (a, b)
6 6
7 7 def cert(cn):
8 8 return dict(subject=((('commonName', cn),),))
9 9
10 10 from mercurial.url import _verifycert
11 11
12 12 # Test non-wildcard certificates
13 13 check(_verifycert(cert('example.com'), 'example.com'),
14 14 None)
15 15 check(_verifycert(cert('example.com'), 'www.example.com'),
16 16 'certificate is for example.com')
17 17 check(_verifycert(cert('www.example.com'), 'example.com'),
18 18 'certificate is for www.example.com')
19 19
20 20 # Test wildcard certificates
21 21 check(_verifycert(cert('*.example.com'), 'www.example.com'),
22 22 None)
23 23 check(_verifycert(cert('*.example.com'), 'example.com'),
24 24 'certificate is for *.example.com')
25 25 check(_verifycert(cert('*.example.com'), 'w.w.example.com'),
26 26 'certificate is for *.example.com')
27 27
28 28 # Test subjectAltName
29 29 san_cert = {'subject': ((('commonName', 'example.com'),),),
30 30 'subjectAltName': (('DNS', '*.example.net'),
31 31 ('DNS', 'example.net'))}
32 32 check(_verifycert(san_cert, 'example.net'),
33 33 None)
34 34 check(_verifycert(san_cert, 'foo.example.net'),
35 35 None)
36 36 # subject is only checked when subjectAltName is empty
37 37 check(_verifycert(san_cert, 'example.com'),
38 38 'certificate is for *.example.net, example.net')
39 39
40 40 # Avoid some pitfalls
41 41 check(_verifycert(cert('*.foo'), 'foo'),
42 42 'certificate is for *.foo')
43 43 check(_verifycert(cert('*o'), 'foo'),
44 44 'certificate is for *o')
45 45
46 46 check(_verifycert({'subject': ()},
47 47 'example.com'),
48 48 'no commonName or subjectAltName found in certificate')
49 49 check(_verifycert(None, 'example.com'),
50 50 'no certificate received')
51 51
52 52 import doctest
53 53
54 54 def test_url():
55 55 """
56 56 >>> from mercurial.url import url
57 57
58 58 This tests for edge cases in url.URL's parsing algorithm. Most of
59 59 these aren't useful for documentation purposes, so they aren't
60 60 part of the class's doc tests.
61 61
62 62 Query strings and fragments:
63 63
64 64 >>> url('http://host/a?b#c')
65 65 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
66 66 >>> url('http://host/a?')
67 67 <url scheme: 'http', host: 'host', path: 'a'>
68 68 >>> url('http://host/a#b#c')
69 69 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b#c'>
70 70 >>> url('http://host/a#b?c')
71 71 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b?c'>
72 72 >>> url('http://host/?a#b')
73 73 <url scheme: 'http', host: 'host', path: '', query: 'a', fragment: 'b'>
74 74 >>> url('http://host/?a#b', parsequery=False)
75 75 <url scheme: 'http', host: 'host', path: '?a', fragment: 'b'>
76 76 >>> url('http://host/?a#b', parsefragment=False)
77 77 <url scheme: 'http', host: 'host', path: '', query: 'a#b'>
78 78 >>> url('http://host/?a#b', parsequery=False, parsefragment=False)
79 79 <url scheme: 'http', host: 'host', path: '?a#b'>
80 80
81 81 IPv6 addresses:
82 82
83 83 >>> url('ldap://[2001:db8::7]/c=GB?objectClass?one')
84 84 <url scheme: 'ldap', host: '[2001:db8::7]', path: 'c=GB',
85 85 query: 'objectClass?one'>
86 86 >>> url('ldap://joe:xxx@[2001:db8::7]:80/c=GB?objectClass?one')
87 87 <url scheme: 'ldap', user: 'joe', passwd: 'xxx', host: '[2001:db8::7]',
88 88 port: '80', path: 'c=GB', query: 'objectClass?one'>
89 89
90 90 Missing scheme, host, etc.:
91 91
92 92 >>> url('://192.0.2.16:80/')
93 93 <url path: '://192.0.2.16:80/'>
94 94 >>> url('http://mercurial.selenic.com')
95 95 <url scheme: 'http', host: 'mercurial.selenic.com'>
96 96 >>> url('/foo')
97 97 <url path: '/foo'>
98 98 >>> url('bundle:/foo')
99 99 <url scheme: 'bundle', path: '/foo'>
100 100 >>> url('a?b#c')
101 101 <url path: 'a?b', fragment: 'c'>
102 102 >>> url('http://x.com?arg=/foo')
103 103 <url scheme: 'http', host: 'x.com', query: 'arg=/foo'>
104 104 >>> url('http://joe:xxx@/foo')
105 105 <url scheme: 'http', user: 'joe', passwd: 'xxx', path: 'foo'>
106 106
107 107 Just a scheme and a path:
108 108
109 109 >>> url('mailto:John.Doe@example.com')
110 110 <url scheme: 'mailto', path: 'John.Doe@example.com'>
111 111 >>> url('a:b:c:d')
112 112 <url path: 'a:b:c:d'>
113 113 >>> url('aa:bb:cc:dd')
114 114 <url scheme: 'aa', path: 'bb:cc:dd'>
115 115
116 116 SSH examples:
117 117
118 118 >>> url('ssh://joe@host//home/joe')
119 119 <url scheme: 'ssh', user: 'joe', host: 'host', path: '/home/joe'>
120 120 >>> url('ssh://joe:xxx@host/src')
121 121 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', path: 'src'>
122 122 >>> url('ssh://joe:xxx@host')
123 123 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host'>
124 124 >>> url('ssh://joe@host')
125 125 <url scheme: 'ssh', user: 'joe', host: 'host'>
126 126 >>> url('ssh://host')
127 127 <url scheme: 'ssh', host: 'host'>
128 128 >>> url('ssh://')
129 129 <url scheme: 'ssh'>
130 130 >>> url('ssh:')
131 131 <url scheme: 'ssh'>
132 132
133 133 Non-numeric port:
134 134
135 135 >>> url('http://example.com:dd')
136 136 <url scheme: 'http', host: 'example.com', port: 'dd'>
137 137 >>> url('ssh://joe:xxx@host:ssh/foo')
138 138 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', port: 'ssh',
139 139 path: 'foo'>
140 140
141 141 Bad authentication credentials:
142 142
143 143 >>> url('http://joe@joeville:123@4:@host/a?b#c')
144 144 <url scheme: 'http', user: 'joe@joeville', passwd: '123@4:',
145 145 host: 'host', path: 'a', query: 'b', fragment: 'c'>
146 146 >>> url('http://!*#?/@!*#?/:@host/a?b#c')
147 147 <url scheme: 'http', host: '!*', fragment: '?/@!*#?/:@host/a?b#c'>
148 148 >>> url('http://!*#?@!*#?:@host/a?b#c')
149 149 <url scheme: 'http', host: '!*', fragment: '?@!*#?:@host/a?b#c'>
150 150 >>> url('http://!*@:!*@@host/a?b#c')
151 151 <url scheme: 'http', user: '!*@', passwd: '!*@', host: 'host',
152 152 path: 'a', query: 'b', fragment: 'c'>
153 153
154 154 File paths:
155 155
156 156 >>> url('a/b/c/d.g.f')
157 157 <url path: 'a/b/c/d.g.f'>
158 158 >>> url('/x///z/y/')
159 159 <url path: '/x///z/y/'>
160 >>> url('/foo:bar')
161 <url path: '/foo:bar'>
162 >>> url('\\\\foo:bar')
163 <url path: '\\\\foo:bar'>
164 >>> url('./foo:bar')
165 <url path: './foo:bar'>
160 166
161 167 Non-localhost file URL:
162 168
163 169 >>> u = url('file://mercurial.selenic.com/foo')
164 170 Traceback (most recent call last):
165 171 File "<stdin>", line 1, in ?
166 172 Abort: file:// URLs can only refer to localhost
167 173
168 174 Empty URL:
169 175
170 176 >>> u = url('')
171 177 >>> u
172 178 <url path: ''>
173 179 >>> str(u)
174 180 ''
175 181
176 182 Empty path with query string:
177 183
178 184 >>> str(url('http://foo/?bar'))
179 185 'http://foo/?bar'
180 186
181 187 Invalid path:
182 188
183 189 >>> u = url('http://foo/bar')
184 190 >>> u.path = 'bar'
185 191 >>> str(u)
186 192 'http://foo/bar'
187 193
188 194 >>> u = url('file:///foo/bar/baz')
189 195 >>> u
190 196 <url scheme: 'file', path: '/foo/bar/baz'>
191 197 >>> str(u)
192 198 'file:/foo/bar/baz'
193 199 """
194 200
195 201 doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
196 202
197 203 # Unicode (IDN) certname isn't supported
198 204 check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'),
199 205 'IDN in certificate not supported')
General Comments 0
You need to be logged in to leave comments. Login now