##// END OF EJS Templates
url: add support for custom handlers in extensions
Henrik Stuart -
r9347:d0474b18 default
parent child Browse files
Show More
@@ -1,530 +1,533
1 1 # url.py - HTTP handling for mercurial
2 2 #
3 3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2, incorporated herein by reference.
9 9
10 10 import urllib, urllib2, urlparse, httplib, os, re, socket, cStringIO
11 11 from i18n import _
12 12 import keepalive, util
13 13
14 14 def hidepassword(url):
15 15 '''hide user credential in a url string'''
16 16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
17 17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
18 18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
19 19
20 20 def removeauth(url):
21 21 '''remove all authentication information from a url string'''
22 22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
23 23 netloc = netloc[netloc.find('@')+1:]
24 24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
25 25
26 26 def netlocsplit(netloc):
27 27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
28 28
29 29 a = netloc.find('@')
30 30 if a == -1:
31 31 user, passwd = None, None
32 32 else:
33 33 userpass, netloc = netloc[:a], netloc[a+1:]
34 34 c = userpass.find(':')
35 35 if c == -1:
36 36 user, passwd = urllib.unquote(userpass), None
37 37 else:
38 38 user = urllib.unquote(userpass[:c])
39 39 passwd = urllib.unquote(userpass[c+1:])
40 40 c = netloc.find(':')
41 41 if c == -1:
42 42 host, port = netloc, None
43 43 else:
44 44 host, port = netloc[:c], netloc[c+1:]
45 45 return host, port, user, passwd
46 46
47 47 def netlocunsplit(host, port, user=None, passwd=None):
48 48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
49 49 if port:
50 50 hostport = host + ':' + port
51 51 else:
52 52 hostport = host
53 53 if user:
54 54 if passwd:
55 55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
56 56 else:
57 57 userpass = urllib.quote(user)
58 58 return userpass + '@' + hostport
59 59 return hostport
60 60
61 61 _safe = ('abcdefghijklmnopqrstuvwxyz'
62 62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
63 63 '0123456789' '_.-/')
64 64 _safeset = None
65 65 _hex = None
66 66 def quotepath(path):
67 67 '''quote the path part of a URL
68 68
69 69 This is similar to urllib.quote, but it also tries to avoid
70 70 quoting things twice (inspired by wget):
71 71
72 72 >>> quotepath('abc def')
73 73 'abc%20def'
74 74 >>> quotepath('abc%20def')
75 75 'abc%20def'
76 76 >>> quotepath('abc%20 def')
77 77 'abc%20%20def'
78 78 >>> quotepath('abc def%20')
79 79 'abc%20def%20'
80 80 >>> quotepath('abc def%2')
81 81 'abc%20def%252'
82 82 >>> quotepath('abc def%')
83 83 'abc%20def%25'
84 84 '''
85 85 global _safeset, _hex
86 86 if _safeset is None:
87 87 _safeset = set(_safe)
88 88 _hex = set('abcdefABCDEF0123456789')
89 89 l = list(path)
90 90 for i in xrange(len(l)):
91 91 c = l[i]
92 92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
93 93 pass
94 94 elif c not in _safeset:
95 95 l[i] = '%%%02X' % ord(c)
96 96 return ''.join(l)
97 97
98 98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
99 99 def __init__(self, ui):
100 100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
101 101 self.ui = ui
102 102
103 103 def find_user_password(self, realm, authuri):
104 104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
105 105 self, realm, authuri)
106 106 user, passwd = authinfo
107 107 if user and passwd:
108 108 self._writedebug(user, passwd)
109 109 return (user, passwd)
110 110
111 111 if not user:
112 112 auth = self.readauthtoken(authuri)
113 113 if auth:
114 114 user, passwd = auth.get('username'), auth.get('password')
115 115 if not user or not passwd:
116 116 if not self.ui.interactive():
117 117 raise util.Abort(_('http authorization required'))
118 118
119 119 self.ui.write(_("http authorization required\n"))
120 120 self.ui.status(_("realm: %s\n") % realm)
121 121 if user:
122 122 self.ui.status(_("user: %s\n") % user)
123 123 else:
124 124 user = self.ui.prompt(_("user:"), default=None)
125 125
126 126 if not passwd:
127 127 passwd = self.ui.getpass()
128 128
129 129 self.add_password(realm, authuri, user, passwd)
130 130 self._writedebug(user, passwd)
131 131 return (user, passwd)
132 132
133 133 def _writedebug(self, user, passwd):
134 134 msg = _('http auth: user %s, password %s\n')
135 135 self.ui.debug(msg % (user, passwd and '*' * len(passwd) or 'not set'))
136 136
137 137 def readauthtoken(self, uri):
138 138 # Read configuration
139 139 config = dict()
140 140 for key, val in self.ui.configitems('auth'):
141 141 group, setting = key.split('.', 1)
142 142 gdict = config.setdefault(group, dict())
143 143 gdict[setting] = val
144 144
145 145 # Find the best match
146 146 scheme, hostpath = uri.split('://', 1)
147 147 bestlen = 0
148 148 bestauth = None
149 149 for auth in config.itervalues():
150 150 prefix = auth.get('prefix')
151 151 if not prefix: continue
152 152 p = prefix.split('://', 1)
153 153 if len(p) > 1:
154 154 schemes, prefix = [p[0]], p[1]
155 155 else:
156 156 schemes = (auth.get('schemes') or 'https').split()
157 157 if (prefix == '*' or hostpath.startswith(prefix)) and \
158 158 len(prefix) > bestlen and scheme in schemes:
159 159 bestlen = len(prefix)
160 160 bestauth = auth
161 161 return bestauth
162 162
163 163 class proxyhandler(urllib2.ProxyHandler):
164 164 def __init__(self, ui):
165 165 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
166 166 # XXX proxyauthinfo = None
167 167
168 168 if proxyurl:
169 169 # proxy can be proper url or host[:port]
170 170 if not (proxyurl.startswith('http:') or
171 171 proxyurl.startswith('https:')):
172 172 proxyurl = 'http://' + proxyurl + '/'
173 173 snpqf = urlparse.urlsplit(proxyurl)
174 174 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
175 175 hpup = netlocsplit(proxynetloc)
176 176
177 177 proxyhost, proxyport, proxyuser, proxypasswd = hpup
178 178 if not proxyuser:
179 179 proxyuser = ui.config("http_proxy", "user")
180 180 proxypasswd = ui.config("http_proxy", "passwd")
181 181
182 182 # see if we should use a proxy for this url
183 183 no_list = [ "localhost", "127.0.0.1" ]
184 184 no_list.extend([p.lower() for
185 185 p in ui.configlist("http_proxy", "no")])
186 186 no_list.extend([p.strip().lower() for
187 187 p in os.getenv("no_proxy", '').split(',')
188 188 if p.strip()])
189 189 # "http_proxy.always" config is for running tests on localhost
190 190 if ui.configbool("http_proxy", "always"):
191 191 self.no_list = []
192 192 else:
193 193 self.no_list = no_list
194 194
195 195 proxyurl = urlparse.urlunsplit((
196 196 proxyscheme, netlocunsplit(proxyhost, proxyport,
197 197 proxyuser, proxypasswd or ''),
198 198 proxypath, proxyquery, proxyfrag))
199 199 proxies = {'http': proxyurl, 'https': proxyurl}
200 200 ui.debug(_('proxying through http://%s:%s\n') %
201 201 (proxyhost, proxyport))
202 202 else:
203 203 proxies = {}
204 204
205 205 # urllib2 takes proxy values from the environment and those
206 206 # will take precedence if found, so drop them
207 207 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
208 208 try:
209 209 if env in os.environ:
210 210 del os.environ[env]
211 211 except OSError:
212 212 pass
213 213
214 214 urllib2.ProxyHandler.__init__(self, proxies)
215 215 self.ui = ui
216 216
217 217 def proxy_open(self, req, proxy, type_):
218 218 host = req.get_host().split(':')[0]
219 219 if host in self.no_list:
220 220 return None
221 221
222 222 # work around a bug in Python < 2.4.2
223 223 # (it leaves a "\n" at the end of Proxy-authorization headers)
224 224 baseclass = req.__class__
225 225 class _request(baseclass):
226 226 def add_header(self, key, val):
227 227 if key.lower() == 'proxy-authorization':
228 228 val = val.strip()
229 229 return baseclass.add_header(self, key, val)
230 230 req.__class__ = _request
231 231
232 232 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
233 233
234 234 class httpsendfile(file):
235 235 def __len__(self):
236 236 return os.fstat(self.fileno()).st_size
237 237
238 238 def _gen_sendfile(connection):
239 239 def _sendfile(self, data):
240 240 # send a file
241 241 if isinstance(data, httpsendfile):
242 242 # if auth required, some data sent twice, so rewind here
243 243 data.seek(0)
244 244 for chunk in util.filechunkiter(data):
245 245 connection.send(self, chunk)
246 246 else:
247 247 connection.send(self, data)
248 248 return _sendfile
249 249
250 250 has_https = hasattr(urllib2, 'HTTPSHandler')
251 251 if has_https:
252 252 try:
253 253 # avoid using deprecated/broken FakeSocket in python 2.6
254 254 import ssl
255 255 _ssl_wrap_socket = ssl.wrap_socket
256 256 except ImportError:
257 257 def _ssl_wrap_socket(sock, key_file, cert_file):
258 258 ssl = socket.ssl(sock, key_file, cert_file)
259 259 return httplib.FakeSocket(sock, ssl)
260 260
261 261 class httpconnection(keepalive.HTTPConnection):
262 262 # must be able to send big bundle as stream.
263 263 send = _gen_sendfile(keepalive.HTTPConnection)
264 264
265 265 def _proxytunnel(self):
266 266 proxyheaders = dict(
267 267 [(x, self.headers[x]) for x in self.headers
268 268 if x.lower().startswith('proxy-')])
269 269 self._set_hostport(self.host, self.port)
270 270 self.send('CONNECT %s:%d HTTP/1.0\r\n' % (self.realhost, self.realport))
271 271 for header in proxyheaders.iteritems():
272 272 self.send('%s: %s\r\n' % header)
273 273 self.send('\r\n')
274 274
275 275 # majority of the following code is duplicated from
276 276 # httplib.HTTPConnection as there are no adequate places to
277 277 # override functions to provide the needed functionality
278 278 res = self.response_class(self.sock,
279 279 strict=self.strict,
280 280 method=self._method)
281 281
282 282 while True:
283 283 version, status, reason = res._read_status()
284 284 if status != httplib.CONTINUE:
285 285 break
286 286 while True:
287 287 skip = res.fp.readline().strip()
288 288 if not skip:
289 289 break
290 290 res.status = status
291 291 res.reason = reason.strip()
292 292
293 293 if res.status == 200:
294 294 while True:
295 295 line = res.fp.readline()
296 296 if line == '\r\n':
297 297 break
298 298 return True
299 299
300 300 if version == 'HTTP/1.0':
301 301 res.version = 10
302 302 elif version.startswith('HTTP/1.'):
303 303 res.version = 11
304 304 elif version == 'HTTP/0.9':
305 305 res.version = 9
306 306 else:
307 307 raise httplib.UnknownProtocol(version)
308 308
309 309 if res.version == 9:
310 310 res.length = None
311 311 res.chunked = 0
312 312 res.will_close = 1
313 313 res.msg = httplib.HTTPMessage(cStringIO.StringIO())
314 314 return False
315 315
316 316 res.msg = httplib.HTTPMessage(res.fp)
317 317 res.msg.fp = None
318 318
319 319 # are we using the chunked-style of transfer encoding?
320 320 trenc = res.msg.getheader('transfer-encoding')
321 321 if trenc and trenc.lower() == "chunked":
322 322 res.chunked = 1
323 323 res.chunk_left = None
324 324 else:
325 325 res.chunked = 0
326 326
327 327 # will the connection close at the end of the response?
328 328 res.will_close = res._check_close()
329 329
330 330 # do we have a Content-Length?
331 331 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
332 332 length = res.msg.getheader('content-length')
333 333 if length and not res.chunked:
334 334 try:
335 335 res.length = int(length)
336 336 except ValueError:
337 337 res.length = None
338 338 else:
339 339 if res.length < 0: # ignore nonsensical negative lengths
340 340 res.length = None
341 341 else:
342 342 res.length = None
343 343
344 344 # does the body have a fixed length? (of zero)
345 345 if (status == httplib.NO_CONTENT or status == httplib.NOT_MODIFIED or
346 346 100 <= status < 200 or # 1xx codes
347 347 res._method == 'HEAD'):
348 348 res.length = 0
349 349
350 350 # if the connection remains open, and we aren't using chunked, and
351 351 # a content-length was not provided, then assume that the connection
352 352 # WILL close.
353 353 if (not res.will_close and
354 354 not res.chunked and
355 355 res.length is None):
356 356 res.will_close = 1
357 357
358 358 self.proxyres = res
359 359
360 360 return False
361 361
362 362 def connect(self):
363 363 if has_https and self.realhost: # use CONNECT proxy
364 364 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
365 365 self.sock.connect((self.host, self.port))
366 366 if self._proxytunnel():
367 367 # we do not support client x509 certificates
368 368 self.sock = _ssl_wrap_socket(self.sock, None, None)
369 369 else:
370 370 keepalive.HTTPConnection.connect(self)
371 371
372 372 def getresponse(self):
373 373 proxyres = getattr(self, 'proxyres', None)
374 374 if proxyres:
375 375 if proxyres.will_close:
376 376 self.close()
377 377 self.proxyres = None
378 378 return proxyres
379 379 return keepalive.HTTPConnection.getresponse(self)
380 380
381 381 class httphandler(keepalive.HTTPHandler):
382 382 def http_open(self, req):
383 383 return self.do_open(httpconnection, req)
384 384
385 385 def _start_transaction(self, h, req):
386 386 if req.get_selector() == req.get_full_url(): # has proxy
387 387 urlparts = urlparse.urlparse(req.get_selector())
388 388 if urlparts[0] == 'https': # only use CONNECT for HTTPS
389 389 if ':' in urlparts[1]:
390 390 realhost, realport = urlparts[1].split(':')
391 391 realport = int(realport)
392 392 else:
393 393 realhost = urlparts[1]
394 394 realport = 443
395 395
396 396 h.realhost = realhost
397 397 h.realport = realport
398 398 h.headers = req.headers.copy()
399 399 h.headers.update(self.parent.addheaders)
400 400 return keepalive.HTTPHandler._start_transaction(self, h, req)
401 401
402 402 h.realhost = None
403 403 h.realport = None
404 404 h.headers = None
405 405 return keepalive.HTTPHandler._start_transaction(self, h, req)
406 406
407 407 def __del__(self):
408 408 self.close_all()
409 409
410 410 if has_https:
411 411 class httpsconnection(httplib.HTTPSConnection):
412 412 response_class = keepalive.HTTPResponse
413 413 # must be able to send big bundle as stream.
414 414 send = _gen_sendfile(httplib.HTTPSConnection)
415 415
416 416 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
417 417 def __init__(self, ui):
418 418 keepalive.KeepAliveHandler.__init__(self)
419 419 urllib2.HTTPSHandler.__init__(self)
420 420 self.ui = ui
421 421 self.pwmgr = passwordmgr(self.ui)
422 422
423 423 def https_open(self, req):
424 424 self.auth = self.pwmgr.readauthtoken(req.get_full_url())
425 425 return self.do_open(self._makeconnection, req)
426 426
427 427 def _makeconnection(self, host, port=443, *args, **kwargs):
428 428 keyfile = None
429 429 certfile = None
430 430
431 431 if args: # key_file
432 432 keyfile = args.pop(0)
433 433 if args: # cert_file
434 434 certfile = args.pop(0)
435 435
436 436 # if the user has specified different key/cert files in
437 437 # hgrc, we prefer these
438 438 if self.auth and 'key' in self.auth and 'cert' in self.auth:
439 439 keyfile = self.auth['key']
440 440 certfile = self.auth['cert']
441 441
442 442 # let host port take precedence
443 443 if ':' in host and '[' not in host or ']:' in host:
444 444 host, port = host.rsplit(':', 1)
445 445 port = int(port)
446 446 if '[' in host:
447 447 host = host[1:-1]
448 448
449 449 return httpsconnection(host, port, keyfile, certfile, *args, **kwargs)
450 450
451 451 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
452 452 # it doesn't know about the auth type requested. This can happen if
453 453 # somebody is using BasicAuth and types a bad password.
454 454 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
455 455 def http_error_auth_reqed(self, auth_header, host, req, headers):
456 456 try:
457 457 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
458 458 self, auth_header, host, req, headers)
459 459 except ValueError, inst:
460 460 arg = inst.args[0]
461 461 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
462 462 return
463 463 raise
464 464
465 465 def getauthinfo(path):
466 466 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
467 467 if not urlpath:
468 468 urlpath = '/'
469 469 if scheme != 'file':
470 470 # XXX: why are we quoting the path again with some smart
471 471 # heuristic here? Anyway, it cannot be done with file://
472 472 # urls since path encoding is os/fs dependent (see
473 473 # urllib.pathname2url() for details).
474 474 urlpath = quotepath(urlpath)
475 475 host, port, user, passwd = netlocsplit(netloc)
476 476
477 477 # urllib cannot handle URLs with embedded user or passwd
478 478 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
479 479 urlpath, query, frag))
480 480 if user:
481 481 netloc = host
482 482 if port:
483 483 netloc += ':' + port
484 484 # Python < 2.4.3 uses only the netloc to search for a password
485 485 authinfo = (None, (url, netloc), user, passwd or '')
486 486 else:
487 487 authinfo = None
488 488 return url, authinfo
489 489
490 handlerfuncs = []
491
490 492 def opener(ui, authinfo=None):
491 493 '''
492 494 construct an opener suitable for urllib2
493 495 authinfo will be added to the password manager
494 496 '''
495 497 handlers = [httphandler()]
496 498 if has_https:
497 499 handlers.append(httpshandler(ui))
498 500
499 501 handlers.append(proxyhandler(ui))
500 502
501 503 passmgr = passwordmgr(ui)
502 504 if authinfo is not None:
503 505 passmgr.add_password(*authinfo)
504 506 user, passwd = authinfo[2:4]
505 507 ui.debug(_('http auth: user %s, password %s\n') %
506 508 (user, passwd and '*' * len(passwd) or 'not set'))
507 509
508 510 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
509 511 httpdigestauthhandler(passmgr)))
512 handlers.extend([h(ui, passmgr) for h in handlerfuncs])
510 513 opener = urllib2.build_opener(*handlers)
511 514
512 515 # 1.0 here is the _protocol_ version
513 516 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
514 517 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
515 518 return opener
516 519
517 520 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
518 521
519 522 def open(ui, url, data=None):
520 523 scheme = None
521 524 m = scheme_re.search(url)
522 525 if m:
523 526 scheme = m.group(1).lower()
524 527 if not scheme:
525 528 path = util.normpath(os.path.abspath(url))
526 529 url = 'file://' + urllib.pathname2url(path)
527 530 authinfo = None
528 531 else:
529 532 url, authinfo = getauthinfo(url)
530 533 return opener(ui, authinfo).open(url, data)
General Comments 0
You need to be logged in to leave comments. Login now