Show More
@@ -23,6 +23,198 b' def _urlunparse(scheme, netloc, path, pa' | |||||
23 | result = scheme + '://' + result[len(scheme + ':'):] |
|
23 | result = scheme + '://' + result[len(scheme + ':'):] | |
24 | return result |
|
24 | return result | |
25 |
|
25 | |||
|
26 | class url(object): | |||
|
27 | """Reliable URL parser. | |||
|
28 | ||||
|
29 | This parses URLs and provides attributes for the following | |||
|
30 | components: | |||
|
31 | ||||
|
32 | <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | |||
|
33 | ||||
|
34 | Missing components are set to None. The only exception is | |||
|
35 | fragment, which is set to '' if present but empty. | |||
|
36 | ||||
|
37 | If parse_fragment is False, fragment is included in query. If | |||
|
38 | parse_query is False, query is included in path. If both are | |||
|
39 | False, both fragment and query are included in path. | |||
|
40 | ||||
|
41 | See http://www.ietf.org/rfc/rfc2396.txt for more information. | |||
|
42 | ||||
|
43 | Examples: | |||
|
44 | ||||
|
45 | >>> url('http://www.ietf.org/rfc/rfc2396.txt') | |||
|
46 | <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | |||
|
47 | >>> url('ssh://[::1]:2200//home/joe/repo') | |||
|
48 | <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | |||
|
49 | >>> url('file:///home/joe/repo') | |||
|
50 | <url scheme: 'file', path: '/home/joe/repo'> | |||
|
51 | >>> url('bundle:foo') | |||
|
52 | <url scheme: 'bundle', path: 'foo'> | |||
|
53 | ||||
|
54 | Authentication credentials: | |||
|
55 | ||||
|
56 | >>> url('ssh://joe:xyz@x/repo') | |||
|
57 | <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | |||
|
58 | >>> url('ssh://joe@x/repo') | |||
|
59 | <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | |||
|
60 | ||||
|
61 | Query strings and fragments: | |||
|
62 | ||||
|
63 | >>> url('http://host/a?b#c') | |||
|
64 | <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |||
|
65 | >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False) | |||
|
66 | <url scheme: 'http', host: 'host', path: 'a?b#c'> | |||
|
67 | """ | |||
|
68 | ||||
|
69 | _safechars = "!~*'()+" | |||
|
70 | _safepchars = "/!~*'()+" | |||
|
71 | ||||
|
72 | def __init__(self, path, parse_query=True, parse_fragment=True): | |||
|
73 | # We slowly chomp away at path until we have only the path left | |||
|
74 | self.scheme = self.user = self.passwd = self.host = None | |||
|
75 | self.port = self.path = self.query = self.fragment = None | |||
|
76 | self._localpath = True | |||
|
77 | ||||
|
78 | if not path.startswith('/') and ':' in path: | |||
|
79 | parts = path.split(':', 1) | |||
|
80 | if parts[0]: | |||
|
81 | self.scheme, path = parts | |||
|
82 | self._localpath = False | |||
|
83 | ||||
|
84 | if not path: | |||
|
85 | path = None | |||
|
86 | if self._localpath: | |||
|
87 | self.path = '' | |||
|
88 | return | |||
|
89 | else: | |||
|
90 | if parse_fragment and '#' in path: | |||
|
91 | path, self.fragment = path.split('#', 1) | |||
|
92 | if not path: | |||
|
93 | path = None | |||
|
94 | if self._localpath: | |||
|
95 | self.path = path | |||
|
96 | return | |||
|
97 | ||||
|
98 | if parse_query and '?' in path: | |||
|
99 | path, self.query = path.split('?', 1) | |||
|
100 | if not path: | |||
|
101 | path = None | |||
|
102 | if not self.query: | |||
|
103 | self.query = None | |||
|
104 | ||||
|
105 | # // is required to specify a host/authority | |||
|
106 | if path and path.startswith('//'): | |||
|
107 | parts = path[2:].split('/', 1) | |||
|
108 | if len(parts) > 1: | |||
|
109 | self.host, path = parts | |||
|
110 | path = path | |||
|
111 | else: | |||
|
112 | self.host = parts[0] | |||
|
113 | path = None | |||
|
114 | if not self.host: | |||
|
115 | self.host = None | |||
|
116 | if path: | |||
|
117 | path = '/' + path | |||
|
118 | ||||
|
119 | if self.host and '@' in self.host: | |||
|
120 | self.user, self.host = self.host.rsplit('@', 1) | |||
|
121 | if ':' in self.user: | |||
|
122 | self.user, self.passwd = self.user.split(':', 1) | |||
|
123 | if not self.host: | |||
|
124 | self.host = None | |||
|
125 | ||||
|
126 | # Don't split on colons in IPv6 addresses without ports | |||
|
127 | if (self.host and ':' in self.host and | |||
|
128 | not (self.host.startswith('[') and self.host.endswith(']'))): | |||
|
129 | self.host, self.port = self.host.rsplit(':', 1) | |||
|
130 | if not self.host: | |||
|
131 | self.host = None | |||
|
132 | self.path = path | |||
|
133 | ||||
|
134 | for a in ('user', 'passwd', 'host', 'port', | |||
|
135 | 'path', 'query', 'fragment'): | |||
|
136 | v = getattr(self, a) | |||
|
137 | if v is not None: | |||
|
138 | setattr(self, a, urllib.unquote(v)) | |||
|
139 | ||||
|
140 | def __repr__(self): | |||
|
141 | attrs = [] | |||
|
142 | for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | |||
|
143 | 'query', 'fragment'): | |||
|
144 | v = getattr(self, a) | |||
|
145 | if v is not None: | |||
|
146 | attrs.append('%s: %r' % (a, v)) | |||
|
147 | return '<url %s>' % ', '.join(attrs) | |||
|
148 | ||||
|
149 | def __str__(self): | |||
|
150 | """Join the URL's components back into a URL string. | |||
|
151 | ||||
|
152 | Examples: | |||
|
153 | ||||
|
154 | >>> str(url('http://user:pw@host:80/?foo#bar')) | |||
|
155 | 'http://user:pw@host:80/?foo#bar' | |||
|
156 | >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | |||
|
157 | 'ssh://user:pw@[::1]:2200//home/joe#' | |||
|
158 | >>> str(url('http://localhost:80//')) | |||
|
159 | 'http://localhost:80//' | |||
|
160 | >>> str(url('http://localhost:80/')) | |||
|
161 | 'http://localhost:80/' | |||
|
162 | >>> str(url('http://localhost:80')) | |||
|
163 | 'http://localhost:80' | |||
|
164 | >>> str(url('bundle:foo')) | |||
|
165 | 'bundle:foo' | |||
|
166 | >>> str(url('path')) | |||
|
167 | 'path' | |||
|
168 | """ | |||
|
169 | if self._localpath: | |||
|
170 | s = self.path | |||
|
171 | if self.fragment: | |||
|
172 | s += '#' + self.fragment | |||
|
173 | return s | |||
|
174 | ||||
|
175 | s = self.scheme + ':' | |||
|
176 | if (self.user or self.passwd or self.host or | |||
|
177 | self.scheme and not self.path): | |||
|
178 | s += '//' | |||
|
179 | if self.user: | |||
|
180 | s += urllib.quote(self.user, safe=self._safechars) | |||
|
181 | if self.passwd: | |||
|
182 | s += ':' + urllib.quote(self.passwd, safe=self._safechars) | |||
|
183 | if self.user or self.passwd: | |||
|
184 | s += '@' | |||
|
185 | if self.host: | |||
|
186 | if not (self.host.startswith('[') and self.host.endswith(']')): | |||
|
187 | s += urllib.quote(self.host) | |||
|
188 | else: | |||
|
189 | s += self.host | |||
|
190 | if self.port: | |||
|
191 | s += ':' + urllib.quote(self.port) | |||
|
192 | if ((self.host and self.path is not None) or | |||
|
193 | (self.host and self.query or self.fragment)): | |||
|
194 | s += '/' | |||
|
195 | if self.path: | |||
|
196 | s += urllib.quote(self.path, safe=self._safepchars) | |||
|
197 | if self.query: | |||
|
198 | s += '?' + urllib.quote(self.query, safe=self._safepchars) | |||
|
199 | if self.fragment is not None: | |||
|
200 | s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | |||
|
201 | return s | |||
|
202 | ||||
|
203 | def authinfo(self): | |||
|
204 | user, passwd = self.user, self.passwd | |||
|
205 | try: | |||
|
206 | self.user, self.passwd = None, None | |||
|
207 | s = str(self) | |||
|
208 | finally: | |||
|
209 | self.user, self.passwd = user, passwd | |||
|
210 | if not self.user: | |||
|
211 | return (s, None) | |||
|
212 | return (s, (None, (str(self), self.host), | |||
|
213 | self.user, self.passwd or '')) | |||
|
214 | ||||
|
215 | def has_scheme(path): | |||
|
216 | return bool(url(path).scheme) | |||
|
217 | ||||
26 | def hidepassword(url): |
|
218 | def hidepassword(url): | |
27 | '''hide user credential in a url string''' |
|
219 | '''hide user credential in a url string''' | |
28 | scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
|
220 | scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
@@ -49,6 +49,142 b" check(_verifycert({'subject': ()}," | |||||
49 | check(_verifycert(None, 'example.com'), |
|
49 | check(_verifycert(None, 'example.com'), | |
50 | 'no certificate received') |
|
50 | 'no certificate received') | |
51 |
|
51 | |||
|
52 | import doctest | |||
|
53 | ||||
|
54 | def test_url(): | |||
|
55 | """ | |||
|
56 | >>> from mercurial.url import url | |||
|
57 | ||||
|
58 | This tests for edge cases in url.URL's parsing algorithm. Most of | |||
|
59 | these aren't useful for documentation purposes, so they aren't | |||
|
60 | part of the class's doc tests. | |||
|
61 | ||||
|
62 | Query strings and fragments: | |||
|
63 | ||||
|
64 | >>> url('http://host/a?b#c') | |||
|
65 | <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |||
|
66 | >>> url('http://host/a?') | |||
|
67 | <url scheme: 'http', host: 'host', path: 'a'> | |||
|
68 | >>> url('http://host/a#b#c') | |||
|
69 | <url scheme: 'http', host: 'host', path: 'a', fragment: 'b#c'> | |||
|
70 | >>> url('http://host/a#b?c') | |||
|
71 | <url scheme: 'http', host: 'host', path: 'a', fragment: 'b?c'> | |||
|
72 | >>> url('http://host/?a#b') | |||
|
73 | <url scheme: 'http', host: 'host', path: '', query: 'a', fragment: 'b'> | |||
|
74 | >>> url('http://host/?a#b', parse_query=False) | |||
|
75 | <url scheme: 'http', host: 'host', path: '?a', fragment: 'b'> | |||
|
76 | >>> url('http://host/?a#b', parse_fragment=False) | |||
|
77 | <url scheme: 'http', host: 'host', path: '', query: 'a#b'> | |||
|
78 | >>> url('http://host/?a#b', parse_query=False, parse_fragment=False) | |||
|
79 | <url scheme: 'http', host: 'host', path: '?a#b'> | |||
|
80 | ||||
|
81 | IPv6 addresses: | |||
|
82 | ||||
|
83 | >>> url('ldap://[2001:db8::7]/c=GB?objectClass?one') | |||
|
84 | <url scheme: 'ldap', host: '[2001:db8::7]', path: 'c=GB', | |||
|
85 | query: 'objectClass?one'> | |||
|
86 | >>> url('ldap://joe:xxx@[2001:db8::7]:80/c=GB?objectClass?one') | |||
|
87 | <url scheme: 'ldap', user: 'joe', passwd: 'xxx', host: '[2001:db8::7]', | |||
|
88 | port: '80', path: 'c=GB', query: 'objectClass?one'> | |||
|
89 | ||||
|
90 | Missing scheme, host, etc.: | |||
|
91 | ||||
|
92 | >>> url('://192.0.2.16:80/') | |||
|
93 | <url path: '://192.0.2.16:80/'> | |||
|
94 | >>> url('http://mercurial.selenic.com') | |||
|
95 | <url scheme: 'http', host: 'mercurial.selenic.com'> | |||
|
96 | >>> url('/foo') | |||
|
97 | <url path: '/foo'> | |||
|
98 | >>> url('bundle:/foo') | |||
|
99 | <url scheme: 'bundle', path: '/foo'> | |||
|
100 | >>> url('a?b#c') | |||
|
101 | <url path: 'a?b', fragment: 'c'> | |||
|
102 | >>> url('http://x.com?arg=/foo') | |||
|
103 | <url scheme: 'http', host: 'x.com', query: 'arg=/foo'> | |||
|
104 | >>> url('http://joe:xxx@/foo') | |||
|
105 | <url scheme: 'http', user: 'joe', passwd: 'xxx', path: 'foo'> | |||
|
106 | ||||
|
107 | Just a scheme and a path: | |||
|
108 | ||||
|
109 | >>> url('mailto:John.Doe@example.com') | |||
|
110 | <url scheme: 'mailto', path: 'John.Doe@example.com'> | |||
|
111 | >>> url('a:b:c:d') | |||
|
112 | <url scheme: 'a', path: 'b:c:d'> | |||
|
113 | ||||
|
114 | SSH examples: | |||
|
115 | ||||
|
116 | >>> url('ssh://joe@host//home/joe') | |||
|
117 | <url scheme: 'ssh', user: 'joe', host: 'host', path: '/home/joe'> | |||
|
118 | >>> url('ssh://joe:xxx@host/src') | |||
|
119 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', path: 'src'> | |||
|
120 | >>> url('ssh://joe:xxx@host') | |||
|
121 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host'> | |||
|
122 | >>> url('ssh://joe@host') | |||
|
123 | <url scheme: 'ssh', user: 'joe', host: 'host'> | |||
|
124 | >>> url('ssh://host') | |||
|
125 | <url scheme: 'ssh', host: 'host'> | |||
|
126 | >>> url('ssh://') | |||
|
127 | <url scheme: 'ssh'> | |||
|
128 | >>> url('ssh:') | |||
|
129 | <url scheme: 'ssh'> | |||
|
130 | ||||
|
131 | Non-numeric port: | |||
|
132 | ||||
|
133 | >>> url('http://example.com:dd') | |||
|
134 | <url scheme: 'http', host: 'example.com', port: 'dd'> | |||
|
135 | >>> url('ssh://joe:xxx@host:ssh/foo') | |||
|
136 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', port: 'ssh', | |||
|
137 | path: 'foo'> | |||
|
138 | ||||
|
139 | Bad authentication credentials: | |||
|
140 | ||||
|
141 | >>> url('http://joe@joeville:123@4:@host/a?b#c') | |||
|
142 | <url scheme: 'http', user: 'joe@joeville', passwd: '123@4:', | |||
|
143 | host: 'host', path: 'a', query: 'b', fragment: 'c'> | |||
|
144 | >>> url('http://!*#?/@!*#?/:@host/a?b#c') | |||
|
145 | <url scheme: 'http', host: '!*', fragment: '?/@!*#?/:@host/a?b#c'> | |||
|
146 | >>> url('http://!*#?@!*#?:@host/a?b#c') | |||
|
147 | <url scheme: 'http', host: '!*', fragment: '?@!*#?:@host/a?b#c'> | |||
|
148 | >>> url('http://!*@:!*@@host/a?b#c') | |||
|
149 | <url scheme: 'http', user: '!*@', passwd: '!*@', host: 'host', | |||
|
150 | path: 'a', query: 'b', fragment: 'c'> | |||
|
151 | ||||
|
152 | File paths: | |||
|
153 | ||||
|
154 | >>> url('a/b/c/d.g.f') | |||
|
155 | <url path: 'a/b/c/d.g.f'> | |||
|
156 | >>> url('/x///z/y/') | |||
|
157 | <url path: '/x///z/y/'> | |||
|
158 | ||||
|
159 | Empty URL: | |||
|
160 | ||||
|
161 | >>> u = url('') | |||
|
162 | >>> u | |||
|
163 | <url path: ''> | |||
|
164 | >>> str(u) | |||
|
165 | '' | |||
|
166 | ||||
|
167 | Empty path with query string: | |||
|
168 | ||||
|
169 | >>> str(url('http://foo/?bar')) | |||
|
170 | 'http://foo/?bar' | |||
|
171 | ||||
|
172 | Invalid path: | |||
|
173 | ||||
|
174 | >>> u = url('http://foo/bar') | |||
|
175 | >>> u.path = 'bar' | |||
|
176 | >>> str(u) | |||
|
177 | 'http://foo/bar' | |||
|
178 | ||||
|
179 | >>> u = url('file:///foo/bar/baz') | |||
|
180 | >>> u | |||
|
181 | <url scheme: 'file', path: '/foo/bar/baz'> | |||
|
182 | >>> str(u) | |||
|
183 | 'file:/foo/bar/baz' | |||
|
184 | """ | |||
|
185 | ||||
|
186 | doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE) | |||
|
187 | ||||
52 | # Unicode (IDN) certname isn't supported |
|
188 | # Unicode (IDN) certname isn't supported | |
53 | check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'), |
|
189 | check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'), | |
54 | 'IDN in certificate not supported') |
|
190 | 'IDN in certificate not supported') |
General Comments 0
You need to be logged in to leave comments.
Login now