Show More
@@ -23,6 +23,198 b' def _urlunparse(scheme, netloc, path, pa' | |||
|
23 | 23 | result = scheme + '://' + result[len(scheme + ':'):] |
|
24 | 24 | return result |
|
25 | 25 | |
|
26 | class url(object): | |
|
27 | """Reliable URL parser. | |
|
28 | ||
|
29 | This parses URLs and provides attributes for the following | |
|
30 | components: | |
|
31 | ||
|
32 | <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | |
|
33 | ||
|
34 | Missing components are set to None. The only exception is | |
|
35 | fragment, which is set to '' if present but empty. | |
|
36 | ||
|
37 | If parse_fragment is False, fragment is included in query. If | |
|
38 | parse_query is False, query is included in path. If both are | |
|
39 | False, both fragment and query are included in path. | |
|
40 | ||
|
41 | See http://www.ietf.org/rfc/rfc2396.txt for more information. | |
|
42 | ||
|
43 | Examples: | |
|
44 | ||
|
45 | >>> url('http://www.ietf.org/rfc/rfc2396.txt') | |
|
46 | <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | |
|
47 | >>> url('ssh://[::1]:2200//home/joe/repo') | |
|
48 | <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | |
|
49 | >>> url('file:///home/joe/repo') | |
|
50 | <url scheme: 'file', path: '/home/joe/repo'> | |
|
51 | >>> url('bundle:foo') | |
|
52 | <url scheme: 'bundle', path: 'foo'> | |
|
53 | ||
|
54 | Authentication credentials: | |
|
55 | ||
|
56 | >>> url('ssh://joe:xyz@x/repo') | |
|
57 | <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | |
|
58 | >>> url('ssh://joe@x/repo') | |
|
59 | <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | |
|
60 | ||
|
61 | Query strings and fragments: | |
|
62 | ||
|
63 | >>> url('http://host/a?b#c') | |
|
64 | <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
|
65 | >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False) | |
|
66 | <url scheme: 'http', host: 'host', path: 'a?b#c'> | |
|
67 | """ | |
|
68 | ||
|
69 | _safechars = "!~*'()+" | |
|
70 | _safepchars = "/!~*'()+" | |
|
71 | ||
|
72 | def __init__(self, path, parse_query=True, parse_fragment=True): | |
|
73 | # We slowly chomp away at path until we have only the path left | |
|
74 | self.scheme = self.user = self.passwd = self.host = None | |
|
75 | self.port = self.path = self.query = self.fragment = None | |
|
76 | self._localpath = True | |
|
77 | ||
|
78 | if not path.startswith('/') and ':' in path: | |
|
79 | parts = path.split(':', 1) | |
|
80 | if parts[0]: | |
|
81 | self.scheme, path = parts | |
|
82 | self._localpath = False | |
|
83 | ||
|
84 | if not path: | |
|
85 | path = None | |
|
86 | if self._localpath: | |
|
87 | self.path = '' | |
|
88 | return | |
|
89 | else: | |
|
90 | if parse_fragment and '#' in path: | |
|
91 | path, self.fragment = path.split('#', 1) | |
|
92 | if not path: | |
|
93 | path = None | |
|
94 | if self._localpath: | |
|
95 | self.path = path | |
|
96 | return | |
|
97 | ||
|
98 | if parse_query and '?' in path: | |
|
99 | path, self.query = path.split('?', 1) | |
|
100 | if not path: | |
|
101 | path = None | |
|
102 | if not self.query: | |
|
103 | self.query = None | |
|
104 | ||
|
105 | # // is required to specify a host/authority | |
|
106 | if path and path.startswith('//'): | |
|
107 | parts = path[2:].split('/', 1) | |
|
108 | if len(parts) > 1: | |
|
109 | self.host, path = parts | |
|
110 | path = path | |
|
111 | else: | |
|
112 | self.host = parts[0] | |
|
113 | path = None | |
|
114 | if not self.host: | |
|
115 | self.host = None | |
|
116 | if path: | |
|
117 | path = '/' + path | |
|
118 | ||
|
119 | if self.host and '@' in self.host: | |
|
120 | self.user, self.host = self.host.rsplit('@', 1) | |
|
121 | if ':' in self.user: | |
|
122 | self.user, self.passwd = self.user.split(':', 1) | |
|
123 | if not self.host: | |
|
124 | self.host = None | |
|
125 | ||
|
126 | # Don't split on colons in IPv6 addresses without ports | |
|
127 | if (self.host and ':' in self.host and | |
|
128 | not (self.host.startswith('[') and self.host.endswith(']'))): | |
|
129 | self.host, self.port = self.host.rsplit(':', 1) | |
|
130 | if not self.host: | |
|
131 | self.host = None | |
|
132 | self.path = path | |
|
133 | ||
|
134 | for a in ('user', 'passwd', 'host', 'port', | |
|
135 | 'path', 'query', 'fragment'): | |
|
136 | v = getattr(self, a) | |
|
137 | if v is not None: | |
|
138 | setattr(self, a, urllib.unquote(v)) | |
|
139 | ||
|
140 | def __repr__(self): | |
|
141 | attrs = [] | |
|
142 | for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | |
|
143 | 'query', 'fragment'): | |
|
144 | v = getattr(self, a) | |
|
145 | if v is not None: | |
|
146 | attrs.append('%s: %r' % (a, v)) | |
|
147 | return '<url %s>' % ', '.join(attrs) | |
|
148 | ||
|
149 | def __str__(self): | |
|
150 | """Join the URL's components back into a URL string. | |
|
151 | ||
|
152 | Examples: | |
|
153 | ||
|
154 | >>> str(url('http://user:pw@host:80/?foo#bar')) | |
|
155 | 'http://user:pw@host:80/?foo#bar' | |
|
156 | >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | |
|
157 | 'ssh://user:pw@[::1]:2200//home/joe#' | |
|
158 | >>> str(url('http://localhost:80//')) | |
|
159 | 'http://localhost:80//' | |
|
160 | >>> str(url('http://localhost:80/')) | |
|
161 | 'http://localhost:80/' | |
|
162 | >>> str(url('http://localhost:80')) | |
|
163 | 'http://localhost:80' | |
|
164 | >>> str(url('bundle:foo')) | |
|
165 | 'bundle:foo' | |
|
166 | >>> str(url('path')) | |
|
167 | 'path' | |
|
168 | """ | |
|
169 | if self._localpath: | |
|
170 | s = self.path | |
|
171 | if self.fragment: | |
|
172 | s += '#' + self.fragment | |
|
173 | return s | |
|
174 | ||
|
175 | s = self.scheme + ':' | |
|
176 | if (self.user or self.passwd or self.host or | |
|
177 | self.scheme and not self.path): | |
|
178 | s += '//' | |
|
179 | if self.user: | |
|
180 | s += urllib.quote(self.user, safe=self._safechars) | |
|
181 | if self.passwd: | |
|
182 | s += ':' + urllib.quote(self.passwd, safe=self._safechars) | |
|
183 | if self.user or self.passwd: | |
|
184 | s += '@' | |
|
185 | if self.host: | |
|
186 | if not (self.host.startswith('[') and self.host.endswith(']')): | |
|
187 | s += urllib.quote(self.host) | |
|
188 | else: | |
|
189 | s += self.host | |
|
190 | if self.port: | |
|
191 | s += ':' + urllib.quote(self.port) | |
|
192 | if ((self.host and self.path is not None) or | |
|
193 | (self.host and self.query or self.fragment)): | |
|
194 | s += '/' | |
|
195 | if self.path: | |
|
196 | s += urllib.quote(self.path, safe=self._safepchars) | |
|
197 | if self.query: | |
|
198 | s += '?' + urllib.quote(self.query, safe=self._safepchars) | |
|
199 | if self.fragment is not None: | |
|
200 | s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | |
|
201 | return s | |
|
202 | ||
|
203 | def authinfo(self): | |
|
204 | user, passwd = self.user, self.passwd | |
|
205 | try: | |
|
206 | self.user, self.passwd = None, None | |
|
207 | s = str(self) | |
|
208 | finally: | |
|
209 | self.user, self.passwd = user, passwd | |
|
210 | if not self.user: | |
|
211 | return (s, None) | |
|
212 | return (s, (None, (str(self), self.host), | |
|
213 | self.user, self.passwd or '')) | |
|
214 | ||
|
215 | def has_scheme(path): | |
|
216 | return bool(url(path).scheme) | |
|
217 | ||
|
26 | 218 | def hidepassword(url): |
|
27 | 219 | '''hide user credential in a url string''' |
|
28 | 220 | scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
@@ -49,6 +49,142 b" check(_verifycert({'subject': ()}," | |||
|
49 | 49 | check(_verifycert(None, 'example.com'), |
|
50 | 50 | 'no certificate received') |
|
51 | 51 | |
|
52 | import doctest | |
|
53 | ||
|
54 | def test_url(): | |
|
55 | """ | |
|
56 | >>> from mercurial.url import url | |
|
57 | ||
|
58 | This tests for edge cases in url.URL's parsing algorithm. Most of | |
|
59 | these aren't useful for documentation purposes, so they aren't | |
|
60 | part of the class's doc tests. | |
|
61 | ||
|
62 | Query strings and fragments: | |
|
63 | ||
|
64 | >>> url('http://host/a?b#c') | |
|
65 | <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
|
66 | >>> url('http://host/a?') | |
|
67 | <url scheme: 'http', host: 'host', path: 'a'> | |
|
68 | >>> url('http://host/a#b#c') | |
|
69 | <url scheme: 'http', host: 'host', path: 'a', fragment: 'b#c'> | |
|
70 | >>> url('http://host/a#b?c') | |
|
71 | <url scheme: 'http', host: 'host', path: 'a', fragment: 'b?c'> | |
|
72 | >>> url('http://host/?a#b') | |
|
73 | <url scheme: 'http', host: 'host', path: '', query: 'a', fragment: 'b'> | |
|
74 | >>> url('http://host/?a#b', parse_query=False) | |
|
75 | <url scheme: 'http', host: 'host', path: '?a', fragment: 'b'> | |
|
76 | >>> url('http://host/?a#b', parse_fragment=False) | |
|
77 | <url scheme: 'http', host: 'host', path: '', query: 'a#b'> | |
|
78 | >>> url('http://host/?a#b', parse_query=False, parse_fragment=False) | |
|
79 | <url scheme: 'http', host: 'host', path: '?a#b'> | |
|
80 | ||
|
81 | IPv6 addresses: | |
|
82 | ||
|
83 | >>> url('ldap://[2001:db8::7]/c=GB?objectClass?one') | |
|
84 | <url scheme: 'ldap', host: '[2001:db8::7]', path: 'c=GB', | |
|
85 | query: 'objectClass?one'> | |
|
86 | >>> url('ldap://joe:xxx@[2001:db8::7]:80/c=GB?objectClass?one') | |
|
87 | <url scheme: 'ldap', user: 'joe', passwd: 'xxx', host: '[2001:db8::7]', | |
|
88 | port: '80', path: 'c=GB', query: 'objectClass?one'> | |
|
89 | ||
|
90 | Missing scheme, host, etc.: | |
|
91 | ||
|
92 | >>> url('://192.0.2.16:80/') | |
|
93 | <url path: '://192.0.2.16:80/'> | |
|
94 | >>> url('http://mercurial.selenic.com') | |
|
95 | <url scheme: 'http', host: 'mercurial.selenic.com'> | |
|
96 | >>> url('/foo') | |
|
97 | <url path: '/foo'> | |
|
98 | >>> url('bundle:/foo') | |
|
99 | <url scheme: 'bundle', path: '/foo'> | |
|
100 | >>> url('a?b#c') | |
|
101 | <url path: 'a?b', fragment: 'c'> | |
|
102 | >>> url('http://x.com?arg=/foo') | |
|
103 | <url scheme: 'http', host: 'x.com', query: 'arg=/foo'> | |
|
104 | >>> url('http://joe:xxx@/foo') | |
|
105 | <url scheme: 'http', user: 'joe', passwd: 'xxx', path: 'foo'> | |
|
106 | ||
|
107 | Just a scheme and a path: | |
|
108 | ||
|
109 | >>> url('mailto:John.Doe@example.com') | |
|
110 | <url scheme: 'mailto', path: 'John.Doe@example.com'> | |
|
111 | >>> url('a:b:c:d') | |
|
112 | <url scheme: 'a', path: 'b:c:d'> | |
|
113 | ||
|
114 | SSH examples: | |
|
115 | ||
|
116 | >>> url('ssh://joe@host//home/joe') | |
|
117 | <url scheme: 'ssh', user: 'joe', host: 'host', path: '/home/joe'> | |
|
118 | >>> url('ssh://joe:xxx@host/src') | |
|
119 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', path: 'src'> | |
|
120 | >>> url('ssh://joe:xxx@host') | |
|
121 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host'> | |
|
122 | >>> url('ssh://joe@host') | |
|
123 | <url scheme: 'ssh', user: 'joe', host: 'host'> | |
|
124 | >>> url('ssh://host') | |
|
125 | <url scheme: 'ssh', host: 'host'> | |
|
126 | >>> url('ssh://') | |
|
127 | <url scheme: 'ssh'> | |
|
128 | >>> url('ssh:') | |
|
129 | <url scheme: 'ssh'> | |
|
130 | ||
|
131 | Non-numeric port: | |
|
132 | ||
|
133 | >>> url('http://example.com:dd') | |
|
134 | <url scheme: 'http', host: 'example.com', port: 'dd'> | |
|
135 | >>> url('ssh://joe:xxx@host:ssh/foo') | |
|
136 | <url scheme: 'ssh', user: 'joe', passwd: 'xxx', host: 'host', port: 'ssh', | |
|
137 | path: 'foo'> | |
|
138 | ||
|
139 | Bad authentication credentials: | |
|
140 | ||
|
141 | >>> url('http://joe@joeville:123@4:@host/a?b#c') | |
|
142 | <url scheme: 'http', user: 'joe@joeville', passwd: '123@4:', | |
|
143 | host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
|
144 | >>> url('http://!*#?/@!*#?/:@host/a?b#c') | |
|
145 | <url scheme: 'http', host: '!*', fragment: '?/@!*#?/:@host/a?b#c'> | |
|
146 | >>> url('http://!*#?@!*#?:@host/a?b#c') | |
|
147 | <url scheme: 'http', host: '!*', fragment: '?@!*#?:@host/a?b#c'> | |
|
148 | >>> url('http://!*@:!*@@host/a?b#c') | |
|
149 | <url scheme: 'http', user: '!*@', passwd: '!*@', host: 'host', | |
|
150 | path: 'a', query: 'b', fragment: 'c'> | |
|
151 | ||
|
152 | File paths: | |
|
153 | ||
|
154 | >>> url('a/b/c/d.g.f') | |
|
155 | <url path: 'a/b/c/d.g.f'> | |
|
156 | >>> url('/x///z/y/') | |
|
157 | <url path: '/x///z/y/'> | |
|
158 | ||
|
159 | Empty URL: | |
|
160 | ||
|
161 | >>> u = url('') | |
|
162 | >>> u | |
|
163 | <url path: ''> | |
|
164 | >>> str(u) | |
|
165 | '' | |
|
166 | ||
|
167 | Empty path with query string: | |
|
168 | ||
|
169 | >>> str(url('http://foo/?bar')) | |
|
170 | 'http://foo/?bar' | |
|
171 | ||
|
172 | Invalid path: | |
|
173 | ||
|
174 | >>> u = url('http://foo/bar') | |
|
175 | >>> u.path = 'bar' | |
|
176 | >>> str(u) | |
|
177 | 'http://foo/bar' | |
|
178 | ||
|
179 | >>> u = url('file:///foo/bar/baz') | |
|
180 | >>> u | |
|
181 | <url scheme: 'file', path: '/foo/bar/baz'> | |
|
182 | >>> str(u) | |
|
183 | 'file:/foo/bar/baz' | |
|
184 | """ | |
|
185 | ||
|
186 | doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE) | |
|
187 | ||
|
52 | 188 | # Unicode (IDN) certname isn't supported |
|
53 | 189 | check(_verifycert(cert(u'\u4f8b.jp'), 'example.jp'), |
|
54 | 190 | 'IDN in certificate not supported') |
General Comments 0
You need to be logged in to leave comments.
Login now