##// END OF EJS Templates
url: detect scheme with a regexp instead of urlsplit()...
Patrick Mezard -
r7285:5ad99abf default
parent child Browse files
Show More
@@ -1,309 +1,314 b''
1 # url.py - HTTP handling for mercurial
1 # url.py - HTTP handling for mercurial
2 #
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms
7 # This software may be used and distributed according to the terms
8 # of the GNU General Public License, incorporated herein by reference.
8 # of the GNU General Public License, incorporated herein by reference.
9
9
10 import urllib, urllib2, urlparse, httplib, os, re
10 import urllib, urllib2, urlparse, httplib, os, re
11 from i18n import _
11 from i18n import _
12 import keepalive, util
12 import keepalive, util
13
13
14 def hidepassword(url):
14 def hidepassword(url):
15 '''hide user credential in a url string'''
15 '''hide user credential in a url string'''
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
19
19
20 def removeauth(url):
20 def removeauth(url):
21 '''remove all authentication information from a url string'''
21 '''remove all authentication information from a url string'''
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
23 netloc = netloc[netloc.find('@')+1:]
23 netloc = netloc[netloc.find('@')+1:]
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
25
25
26 def netlocsplit(netloc):
26 def netlocsplit(netloc):
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
28
28
29 a = netloc.find('@')
29 a = netloc.find('@')
30 if a == -1:
30 if a == -1:
31 user, passwd = None, None
31 user, passwd = None, None
32 else:
32 else:
33 userpass, netloc = netloc[:a], netloc[a+1:]
33 userpass, netloc = netloc[:a], netloc[a+1:]
34 c = userpass.find(':')
34 c = userpass.find(':')
35 if c == -1:
35 if c == -1:
36 user, passwd = urllib.unquote(userpass), None
36 user, passwd = urllib.unquote(userpass), None
37 else:
37 else:
38 user = urllib.unquote(userpass[:c])
38 user = urllib.unquote(userpass[:c])
39 passwd = urllib.unquote(userpass[c+1:])
39 passwd = urllib.unquote(userpass[c+1:])
40 c = netloc.find(':')
40 c = netloc.find(':')
41 if c == -1:
41 if c == -1:
42 host, port = netloc, None
42 host, port = netloc, None
43 else:
43 else:
44 host, port = netloc[:c], netloc[c+1:]
44 host, port = netloc[:c], netloc[c+1:]
45 return host, port, user, passwd
45 return host, port, user, passwd
46
46
47 def netlocunsplit(host, port, user=None, passwd=None):
47 def netlocunsplit(host, port, user=None, passwd=None):
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
49 if port:
49 if port:
50 hostport = host + ':' + port
50 hostport = host + ':' + port
51 else:
51 else:
52 hostport = host
52 hostport = host
53 if user:
53 if user:
54 if passwd:
54 if passwd:
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
56 else:
56 else:
57 userpass = urllib.quote(user)
57 userpass = urllib.quote(user)
58 return userpass + '@' + hostport
58 return userpass + '@' + hostport
59 return hostport
59 return hostport
60
60
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
63 '0123456789' '_.-/')
63 '0123456789' '_.-/')
64 _safeset = None
64 _safeset = None
65 _hex = None
65 _hex = None
66 def quotepath(path):
66 def quotepath(path):
67 '''quote the path part of a URL
67 '''quote the path part of a URL
68
68
69 This is similar to urllib.quote, but it also tries to avoid
69 This is similar to urllib.quote, but it also tries to avoid
70 quoting things twice (inspired by wget):
70 quoting things twice (inspired by wget):
71
71
72 >>> quotepath('abc def')
72 >>> quotepath('abc def')
73 'abc%20def'
73 'abc%20def'
74 >>> quotepath('abc%20def')
74 >>> quotepath('abc%20def')
75 'abc%20def'
75 'abc%20def'
76 >>> quotepath('abc%20 def')
76 >>> quotepath('abc%20 def')
77 'abc%20%20def'
77 'abc%20%20def'
78 >>> quotepath('abc def%20')
78 >>> quotepath('abc def%20')
79 'abc%20def%20'
79 'abc%20def%20'
80 >>> quotepath('abc def%2')
80 >>> quotepath('abc def%2')
81 'abc%20def%252'
81 'abc%20def%252'
82 >>> quotepath('abc def%')
82 >>> quotepath('abc def%')
83 'abc%20def%25'
83 'abc%20def%25'
84 '''
84 '''
85 global _safeset, _hex
85 global _safeset, _hex
86 if _safeset is None:
86 if _safeset is None:
87 _safeset = util.set(_safe)
87 _safeset = util.set(_safe)
88 _hex = util.set('abcdefABCDEF0123456789')
88 _hex = util.set('abcdefABCDEF0123456789')
89 l = list(path)
89 l = list(path)
90 for i in xrange(len(l)):
90 for i in xrange(len(l)):
91 c = l[i]
91 c = l[i]
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
93 pass
93 pass
94 elif c not in _safeset:
94 elif c not in _safeset:
95 l[i] = '%%%02X' % ord(c)
95 l[i] = '%%%02X' % ord(c)
96 return ''.join(l)
96 return ''.join(l)
97
97
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
99 def __init__(self, ui):
99 def __init__(self, ui):
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
101 self.ui = ui
101 self.ui = ui
102
102
103 def find_user_password(self, realm, authuri):
103 def find_user_password(self, realm, authuri):
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
105 self, realm, authuri)
105 self, realm, authuri)
106 user, passwd = authinfo
106 user, passwd = authinfo
107 if user and passwd:
107 if user and passwd:
108 return (user, passwd)
108 return (user, passwd)
109
109
110 if not self.ui.interactive:
110 if not self.ui.interactive:
111 raise util.Abort(_('http authorization required'))
111 raise util.Abort(_('http authorization required'))
112
112
113 self.ui.write(_("http authorization required\n"))
113 self.ui.write(_("http authorization required\n"))
114 self.ui.status(_("realm: %s\n") % realm)
114 self.ui.status(_("realm: %s\n") % realm)
115 if user:
115 if user:
116 self.ui.status(_("user: %s\n") % user)
116 self.ui.status(_("user: %s\n") % user)
117 else:
117 else:
118 user = self.ui.prompt(_("user:"), default=None)
118 user = self.ui.prompt(_("user:"), default=None)
119
119
120 if not passwd:
120 if not passwd:
121 passwd = self.ui.getpass()
121 passwd = self.ui.getpass()
122
122
123 self.add_password(realm, authuri, user, passwd)
123 self.add_password(realm, authuri, user, passwd)
124 return (user, passwd)
124 return (user, passwd)
125
125
126 class proxyhandler(urllib2.ProxyHandler):
126 class proxyhandler(urllib2.ProxyHandler):
127 def __init__(self, ui):
127 def __init__(self, ui):
128 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
128 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
129 # XXX proxyauthinfo = None
129 # XXX proxyauthinfo = None
130
130
131 if proxyurl:
131 if proxyurl:
132 # proxy can be proper url or host[:port]
132 # proxy can be proper url or host[:port]
133 if not (proxyurl.startswith('http:') or
133 if not (proxyurl.startswith('http:') or
134 proxyurl.startswith('https:')):
134 proxyurl.startswith('https:')):
135 proxyurl = 'http://' + proxyurl + '/'
135 proxyurl = 'http://' + proxyurl + '/'
136 snpqf = urlparse.urlsplit(proxyurl)
136 snpqf = urlparse.urlsplit(proxyurl)
137 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
137 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
138 hpup = netlocsplit(proxynetloc)
138 hpup = netlocsplit(proxynetloc)
139
139
140 proxyhost, proxyport, proxyuser, proxypasswd = hpup
140 proxyhost, proxyport, proxyuser, proxypasswd = hpup
141 if not proxyuser:
141 if not proxyuser:
142 proxyuser = ui.config("http_proxy", "user")
142 proxyuser = ui.config("http_proxy", "user")
143 proxypasswd = ui.config("http_proxy", "passwd")
143 proxypasswd = ui.config("http_proxy", "passwd")
144
144
145 # see if we should use a proxy for this url
145 # see if we should use a proxy for this url
146 no_list = [ "localhost", "127.0.0.1" ]
146 no_list = [ "localhost", "127.0.0.1" ]
147 no_list.extend([p.lower() for
147 no_list.extend([p.lower() for
148 p in ui.configlist("http_proxy", "no")])
148 p in ui.configlist("http_proxy", "no")])
149 no_list.extend([p.strip().lower() for
149 no_list.extend([p.strip().lower() for
150 p in os.getenv("no_proxy", '').split(',')
150 p in os.getenv("no_proxy", '').split(',')
151 if p.strip()])
151 if p.strip()])
152 # "http_proxy.always" config is for running tests on localhost
152 # "http_proxy.always" config is for running tests on localhost
153 if ui.configbool("http_proxy", "always"):
153 if ui.configbool("http_proxy", "always"):
154 self.no_list = []
154 self.no_list = []
155 else:
155 else:
156 self.no_list = no_list
156 self.no_list = no_list
157
157
158 proxyurl = urlparse.urlunsplit((
158 proxyurl = urlparse.urlunsplit((
159 proxyscheme, netlocunsplit(proxyhost, proxyport,
159 proxyscheme, netlocunsplit(proxyhost, proxyport,
160 proxyuser, proxypasswd or ''),
160 proxyuser, proxypasswd or ''),
161 proxypath, proxyquery, proxyfrag))
161 proxypath, proxyquery, proxyfrag))
162 proxies = {'http': proxyurl, 'https': proxyurl}
162 proxies = {'http': proxyurl, 'https': proxyurl}
163 ui.debug(_('proxying through http://%s:%s\n') %
163 ui.debug(_('proxying through http://%s:%s\n') %
164 (proxyhost, proxyport))
164 (proxyhost, proxyport))
165 else:
165 else:
166 proxies = {}
166 proxies = {}
167
167
168 # urllib2 takes proxy values from the environment and those
168 # urllib2 takes proxy values from the environment and those
169 # will take precedence if found, so drop them
169 # will take precedence if found, so drop them
170 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
170 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
171 try:
171 try:
172 if env in os.environ:
172 if env in os.environ:
173 del os.environ[env]
173 del os.environ[env]
174 except OSError:
174 except OSError:
175 pass
175 pass
176
176
177 urllib2.ProxyHandler.__init__(self, proxies)
177 urllib2.ProxyHandler.__init__(self, proxies)
178 self.ui = ui
178 self.ui = ui
179
179
180 def proxy_open(self, req, proxy, type_):
180 def proxy_open(self, req, proxy, type_):
181 host = req.get_host().split(':')[0]
181 host = req.get_host().split(':')[0]
182 if host in self.no_list:
182 if host in self.no_list:
183 return None
183 return None
184
184
185 # work around a bug in Python < 2.4.2
185 # work around a bug in Python < 2.4.2
186 # (it leaves a "\n" at the end of Proxy-authorization headers)
186 # (it leaves a "\n" at the end of Proxy-authorization headers)
187 baseclass = req.__class__
187 baseclass = req.__class__
188 class _request(baseclass):
188 class _request(baseclass):
189 def add_header(self, key, val):
189 def add_header(self, key, val):
190 if key.lower() == 'proxy-authorization':
190 if key.lower() == 'proxy-authorization':
191 val = val.strip()
191 val = val.strip()
192 return baseclass.add_header(self, key, val)
192 return baseclass.add_header(self, key, val)
193 req.__class__ = _request
193 req.__class__ = _request
194
194
195 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
195 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
196
196
197 class httpsendfile(file):
197 class httpsendfile(file):
198 def __len__(self):
198 def __len__(self):
199 return os.fstat(self.fileno()).st_size
199 return os.fstat(self.fileno()).st_size
200
200
201 def _gen_sendfile(connection):
201 def _gen_sendfile(connection):
202 def _sendfile(self, data):
202 def _sendfile(self, data):
203 # send a file
203 # send a file
204 if isinstance(data, httpsendfile):
204 if isinstance(data, httpsendfile):
205 # if auth required, some data sent twice, so rewind here
205 # if auth required, some data sent twice, so rewind here
206 data.seek(0)
206 data.seek(0)
207 for chunk in util.filechunkiter(data):
207 for chunk in util.filechunkiter(data):
208 connection.send(self, chunk)
208 connection.send(self, chunk)
209 else:
209 else:
210 connection.send(self, data)
210 connection.send(self, data)
211 return _sendfile
211 return _sendfile
212
212
213 class httpconnection(keepalive.HTTPConnection):
213 class httpconnection(keepalive.HTTPConnection):
214 # must be able to send big bundle as stream.
214 # must be able to send big bundle as stream.
215 send = _gen_sendfile(keepalive.HTTPConnection)
215 send = _gen_sendfile(keepalive.HTTPConnection)
216
216
217 class httphandler(keepalive.HTTPHandler):
217 class httphandler(keepalive.HTTPHandler):
218 def http_open(self, req):
218 def http_open(self, req):
219 return self.do_open(httpconnection, req)
219 return self.do_open(httpconnection, req)
220
220
221 def __del__(self):
221 def __del__(self):
222 self.close_all()
222 self.close_all()
223
223
224 has_https = hasattr(urllib2, 'HTTPSHandler')
224 has_https = hasattr(urllib2, 'HTTPSHandler')
225 if has_https:
225 if has_https:
226 class httpsconnection(httplib.HTTPSConnection):
226 class httpsconnection(httplib.HTTPSConnection):
227 response_class = keepalive.HTTPResponse
227 response_class = keepalive.HTTPResponse
228 # must be able to send big bundle as stream.
228 # must be able to send big bundle as stream.
229 send = _gen_sendfile(httplib.HTTPSConnection)
229 send = _gen_sendfile(httplib.HTTPSConnection)
230
230
231 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
231 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
232 def https_open(self, req):
232 def https_open(self, req):
233 return self.do_open(httpsconnection, req)
233 return self.do_open(httpsconnection, req)
234
234
235 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
235 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
236 # it doesn't know about the auth type requested. This can happen if
236 # it doesn't know about the auth type requested. This can happen if
237 # somebody is using BasicAuth and types a bad password.
237 # somebody is using BasicAuth and types a bad password.
238 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
238 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
239 def http_error_auth_reqed(self, auth_header, host, req, headers):
239 def http_error_auth_reqed(self, auth_header, host, req, headers):
240 try:
240 try:
241 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
241 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
242 self, auth_header, host, req, headers)
242 self, auth_header, host, req, headers)
243 except ValueError, inst:
243 except ValueError, inst:
244 arg = inst.args[0]
244 arg = inst.args[0]
245 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
245 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
246 return
246 return
247 raise
247 raise
248
248
249 def getauthinfo(path):
249 def getauthinfo(path):
250 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
250 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
251 if not urlpath:
251 if not urlpath:
252 urlpath = '/'
252 urlpath = '/'
253 if scheme != 'file':
253 if scheme != 'file':
254 # XXX: why are we quoting the path again with some smart
254 # XXX: why are we quoting the path again with some smart
255 # heuristic here? Anyway, it cannot be done with file://
255 # heuristic here? Anyway, it cannot be done with file://
256 # urls since path encoding is os/fs dependent (see
256 # urls since path encoding is os/fs dependent (see
257 # urllib.pathname2url() for details).
257 # urllib.pathname2url() for details).
258 urlpath = quotepath(urlpath)
258 urlpath = quotepath(urlpath)
259 host, port, user, passwd = netlocsplit(netloc)
259 host, port, user, passwd = netlocsplit(netloc)
260
260
261 # urllib cannot handle URLs with embedded user or passwd
261 # urllib cannot handle URLs with embedded user or passwd
262 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
262 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
263 urlpath, query, frag))
263 urlpath, query, frag))
264 if user:
264 if user:
265 netloc = host
265 netloc = host
266 if port:
266 if port:
267 netloc += ':' + port
267 netloc += ':' + port
268 # Python < 2.4.3 uses only the netloc to search for a password
268 # Python < 2.4.3 uses only the netloc to search for a password
269 authinfo = (None, (url, netloc), user, passwd or '')
269 authinfo = (None, (url, netloc), user, passwd or '')
270 else:
270 else:
271 authinfo = None
271 authinfo = None
272 return url, authinfo
272 return url, authinfo
273
273
274 def opener(ui, authinfo=None):
274 def opener(ui, authinfo=None):
275 '''
275 '''
276 construct an opener suitable for urllib2
276 construct an opener suitable for urllib2
277 authinfo will be added to the password manager
277 authinfo will be added to the password manager
278 '''
278 '''
279 handlers = [httphandler()]
279 handlers = [httphandler()]
280 if has_https:
280 if has_https:
281 handlers.append(httpshandler())
281 handlers.append(httpshandler())
282
282
283 handlers.append(proxyhandler(ui))
283 handlers.append(proxyhandler(ui))
284
284
285 passmgr = passwordmgr(ui)
285 passmgr = passwordmgr(ui)
286 if authinfo is not None:
286 if authinfo is not None:
287 passmgr.add_password(*authinfo)
287 passmgr.add_password(*authinfo)
288 user, passwd = authinfo[2:4]
288 user, passwd = authinfo[2:4]
289 ui.debug(_('http auth: user %s, password %s\n') %
289 ui.debug(_('http auth: user %s, password %s\n') %
290 (user, passwd and '*' * len(passwd) or 'not set'))
290 (user, passwd and '*' * len(passwd) or 'not set'))
291
291
292 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
292 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
293 httpdigestauthhandler(passmgr)))
293 httpdigestauthhandler(passmgr)))
294 opener = urllib2.build_opener(*handlers)
294 opener = urllib2.build_opener(*handlers)
295
295
296 # 1.0 here is the _protocol_ version
296 # 1.0 here is the _protocol_ version
297 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
297 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
298 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
298 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
299 return opener
299 return opener
300
300
301 scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
302
301 def open(ui, url, data=None):
303 def open(ui, url, data=None):
302 scheme = urlparse.urlsplit(url)[0]
304 scheme = None
305 m = scheme_re.search(url)
306 if m:
307 scheme = m.group(1).lower()
303 if not scheme:
308 if not scheme:
304 path = util.normpath(os.path.abspath(url))
309 path = util.normpath(os.path.abspath(url))
305 url = 'file://' + urllib.pathname2url(path)
310 url = 'file://' + urllib.pathname2url(path)
306 authinfo = None
311 authinfo = None
307 else:
312 else:
308 url, authinfo = getauthinfo(url)
313 url, authinfo = getauthinfo(url)
309 return opener(ui, authinfo).open(url, data)
314 return opener(ui, authinfo).open(url, data)
General Comments 0
You need to be logged in to leave comments. Login now