##// END OF EJS Templates
url: provide url object...
Brodie Rao -
r13770:4e8f2310 default
parent child Browse files
Show More
@@ -23,6 +23,198 b' def _urlunparse(scheme, netloc, path, pa'
23 23 result = scheme + '://' + result[len(scheme + ':'):]
24 24 return result
25 25
26 class url(object):
27 """Reliable URL parser.
28
29 This parses URLs and provides attributes for the following
30 components:
31
32 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
33
34 Missing components are set to None. The only exception is
35 fragment, which is set to '' if present but empty.
36
37 If parse_fragment is False, fragment is included in query. If
38 parse_query is False, query is included in path. If both are
39 False, both fragment and query are included in path.
40
41 See http://www.ietf.org/rfc/rfc2396.txt for more information.
42
43 Examples:
44
45 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
46 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
47 >>> url('ssh://[::1]:2200//home/joe/repo')
48 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
49 >>> url('file:///home/joe/repo')
50 <url scheme: 'file', path: '/home/joe/repo'>
51 >>> url('bundle:foo')
52 <url scheme: 'bundle', path: 'foo'>
53
54 Authentication credentials:
55
56 >>> url('ssh://joe:xyz@x/repo')
57 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
58 >>> url('ssh://joe@x/repo')
59 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
60
61 Query strings and fragments:
62
63 >>> url('http://host/a?b#c')
64 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
65 >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False)
66 <url scheme: 'http', host: 'host', path: 'a?b#c'>
67 """
68
69 _safechars = "!~*'()+"
70 _safepchars = "/!~*'()+"
71
72 def __init__(self, path, parse_query=True, parse_fragment=True):
73 # We slowly chomp away at path until we have only the path left
74 self.scheme = self.user = self.passwd = self.host = None
75 self.port = self.path = self.query = self.fragment = None
76 self._localpath = True
77
78 if not path.startswith('/') and ':' in path:
79 parts = path.split(':', 1)
80 if parts[0]:
81 self.scheme, path = parts
82 self._localpath = False
83
84 if not path:
85 path = None
86 if self._localpath:
87 self.path = ''
88 return
89 else:
90 if parse_fragment and '#' in path:
91 path, self.fragment = path.split('#', 1)
92 if not path:
93 path = None
94 if self._localpath:
95 self.path = path
96 return
97
98 if parse_query and '?' in path:
99 path, self.query = path.split('?', 1)
100 if not path:
101 path = None
102 if not self.query:
103 self.query = None
104
105 # // is required to specify a host/authority
106 if path and path.startswith('//'):
107 parts = path[2:].split('/', 1)
108 if len(parts) > 1:
109 self.host, path = parts
110 path = path
111 else:
112 self.host = parts[0]
113 path = None
114 if not self.host:
115 self.host = None
116 if path:
117 path = '/' + path
118
119 if self.host and '@' in self.host:
120 self.user, self.host = self.host.rsplit('@', 1)
121 if ':' in self.user:
122 self.user, self.passwd = self.user.split(':', 1)
123 if not self.host:
124 self.host = None
125
126 # Don't split on colons in IPv6 addresses without ports
127 if (self.host and ':' in self.host and
128 not (self.host.startswith('[') and self.host.endswith(']'))):
129 self.host, self.port = self.host.rsplit(':', 1)
130 if not self.host:
131 self.host = None
132 self.path = path
133
134 for a in ('user', 'passwd', 'host', 'port',
135 'path', 'query', 'fragment'):
136 v = getattr(self, a)
137 if v is not None:
138 setattr(self, a, urllib.unquote(v))
139
140 def __repr__(self):
141 attrs = []
142 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
143 'query', 'fragment'):
144 v = getattr(self, a)
145 if v is not None:
146 attrs.append('%s: %r' % (a, v))
147 return '<url %s>' % ', '.join(attrs)
148
149 def __str__(self):
150 """Join the URL's components back into a URL string.
151
152 Examples:
153
154 >>> str(url('http://user:pw@host:80/?foo#bar'))
155 'http://user:pw@host:80/?foo#bar'
156 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
157 'ssh://user:pw@[::1]:2200//home/joe#'
158 >>> str(url('http://localhost:80//'))
159 'http://localhost:80//'
160 >>> str(url('http://localhost:80/'))
161 'http://localhost:80/'
162 >>> str(url('http://localhost:80'))
163 'http://localhost:80'
164 >>> str(url('bundle:foo'))
165 'bundle:foo'
166 >>> str(url('path'))
167 'path'
168 """
169 if self._localpath:
170 s = self.path
171 if self.fragment:
172 s += '#' + self.fragment
173 return s
174
175 s = self.scheme + ':'
176 if (self.user or self.passwd or self.host or
177 self.scheme and not self.path):
178 s += '//'
179 if self.user:
180 s += urllib.quote(self.user, safe=self._safechars)
181 if self.passwd:
182 s += ':' + urllib.quote(self.passwd, safe=self._safechars)
183 if self.user or self.passwd:
184 s += '@'
185 if self.host:
186 if not (self.host.startswith('[') and self.host.endswith(']')):
187 s += urllib.quote(self.host)
188 else:
189 s += self.host
190 if self.port:
191 s += ':' + urllib.quote(self.port)
192 if ((self.host and self.path is not None) or
193 (self.host and self.query or self.fragment)):
194 s += '/'
195 if self.path:
196 s += urllib.quote(self.path, safe=self._safepchars)
197 if self.query:
198 s += '?' + urllib.quote(self.query, safe=self._safepchars)
199 if self.fragment is not None:
200 s += '#' + urllib.quote(self.fragment, safe=self._safepchars)
201 return s
202
203 def authinfo(self):
204 user, passwd = self.user, self.passwd
205 try:
206 self.user, self.passwd = None, None
207 s = str(self)
208 finally:
209 self.user, self.passwd = user, passwd
210 if not self.user:
211 return (s, None)
212 return (s, (None, (str(self), self.host),
213 self.user, self.passwd or ''))
214
215 def has_scheme(path):
216 return bool(url(path).scheme)
217
26 218 def hidepassword(url):
27 219 '''hide user credential in a url string'''
28 220 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
@@ -49,6 +49,142 b" check(_verifycert({'subject': ()},"
49 49 check(_verifycert(None, 'example.com'),
50 50 'no certificate received')
51 51
52 import doctest
53
54 def test_url():
55 """
56 >>> from mercurial.url import url
57
58 This tests for edge cases in url.URL's parsing algorithm. Most of
59 these aren't useful for documentation purposes, so they aren't
60 part of the class's doc tests.
61
62 Query strings and fragments:
63
64 >>> url('http://host/a?b#c')
65 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
66 >>> url('http://host/a?')
67 <url scheme: 'http', host: 'host', path: 'a'>
68 >>> url('http://host/a#b#c')
69 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b#c'>
70 >>> url('http://host/a#b?c')
71 <url scheme: 'http', host: 'host', path: 'a', fragment: 'b?c'>
72 >>> url('http://host/?a#b')
73 <url scheme: 'http', host: 'host', path: '', query: 'a', fragment: 'b'>
74 >>> url('http://host/?a#b', parse_query=False)
75 <url scheme: 'http', host: 'host', path: '?a', fragment: 'b'>
76 >>> url('http://host/?a#b', parse_fragment=False)
77 <url scheme: 'http', host: 'host', path: '', query: 'a#b'>
78 >>> url('http://host/?a#b', parse_query=False, parse_fragment=False)
79 <url scheme: 'http', host: 'host', path: '?a#b'>
80
81 IPv6 addresses:
82
83 >>> url('ldap://[2001:db8::7]/c=GB?objectClass?one')
84 <url scheme: 'ldap', host: '[2001:db8::7]', path: 'c=GB',
85 query: 'objectClass?one'>
86 >>> url('ldap://joe:xxx@[2001:db8::7]:80/c=GB?objectClass?one')
87 <url scheme: 'ldap', user: 'joe', passwd: 'xxx', host: '[2001:db8::7]',
88 port: '80', path: 'c=GB', query: 'objectClass?one'>
89
90 Missing scheme, host, etc.:
91
92 >>> url('://192.0.2.16:80/')
93 <url path: '://192.0.2.16:80/'>
94 >>> url('http://mercurial.selenic.com')
95 <url scheme: 'http', host: 'mercurial.selenic.com'>
96 >>> url('/foo')
97 <url path: '/foo'>
98 >>> url('bundle:/foo')
99 <url scheme: 'bundle', path: '/foo'>
100 >>> url('a?b#c')
101 <url path: 'a?b', fragment: 'c'>
102 >>> url('http://x.com?arg=/foo')
103 <url scheme: 'http', host: 'x.com', query: 'arg=/foo'>
104 >>> url('http://joe:xxx@/foo')
105 <url scheme: 'http', user: 'joe', passwd: 'xxx', path: 'foo'>
106
107 Just a scheme and a path:
108
109 >>> url('mailto:John.Doe@example.com')
110 <url scheme: 'mailto', path: 'John.Doe@example.com'>
111 >>> url('a:b:c:d')
112 <url scheme: 'a', path: 'b:c:d'>
113
114 SSH examples:
115
116 >>> url('ssh://joe@host//home/joe')
117 <url scheme: 'ssh', user: 'joe', host: 'host', path: '/home/joe'>
118 >>> url('ssh://joe:xxx@host/src')
119 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', path: 'src'>
120 >>> url('ssh://joe:xxx@host')
121 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host'>
122 >>> url('ssh://joe@host')
123 <url scheme: 'ssh', user: 'joe', host: 'host'>
124 >>> url('ssh://host')
125 <url scheme: 'ssh', host: 'host'>
126 >>> url('ssh://')
127 <url scheme: 'ssh'>
128 >>> url('ssh:')
129 <url scheme: 'ssh'>
130
131 Non-numeric port:
132
133 >>> url('http://example.com:dd')
134 <url scheme: 'http', host: 'example.com', port: 'dd'>
135 >>> url('ssh://joe:xxx@host:ssh/foo')
136 <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', port: 'ssh',
137 path: 'foo'>
138
139 Bad authentication credentials:
140
141 >>> url('http://joe@joeville:123@4:@host/a?b#c')
142 <url scheme: 'http', user: 'joe@joeville', passwd: '123@4:',
143 host: 'host', path: 'a', query: 'b', fragment: 'c'>
144 >>> url('http://!*#?/@!*#?/:@host/a?b#c')
145 <url scheme: 'http', host: '!*', fragment: '?/@!*#?/:@host/a?b#c'>
146 >>> url('http://!*#?@!*#?:@host/a?b#c')
147 <url scheme: 'http', host: '!*', fragment: '?@!*#?:@host/a?b#c'>
148 >>> url('http://!*@:!*@@host/a?b#c')
149 <url scheme: 'http', user: '!*@', passwd: '!*@', host: 'host',
150 path: 'a', query: 'b', fragment: 'c'>
151
152 File paths:
153
154 >>> url('a/b/c/d.g.f')
155 <url path: 'a/b/c/d.g.f'>
156 >>> url('/x///z/y/')
157 <url path: '/x///z/y/'>
158
159 Empty URL:
160
161 >>> u = url('')
162 >>> u
163 <url path: ''>
164 >>> str(u)
165 ''
166
167 Empty path with query string:
168
169 >>> str(url('http://foo/?bar'))
170 'http://foo/?bar'
171
172 Invalid path:
173
174 >>> u = url('http://foo/bar')
175 >>> u.path = 'bar'
176 >>> str(u)
177 'http://foo/bar'
178
179 >>> u = url('file:///foo/bar/baz')
180 >>> u
181 <url scheme: 'file', path: '/foo/bar/baz'>
182 >>> str(u)
183 'file:/foo/bar/baz'
184 """
185
186 doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
187
52 188 # Unicode (IDN) certname isn't supported
53 189 check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'),
54 190 'IDN in certificate not supported')
General Comments 0
You need to be logged in to leave comments. Login now