##// END OF EJS Templates
typing: import unconditionally...
Arseniy Alekseyev -
r50801:15175774 default
parent child Browse files
Show More
@@ -1,380 +1,378 b''
1 import contextlib
1 import contextlib
2 import errno
2 import errno
3 import os
3 import os
4 import posixpath
4 import posixpath
5 import stat
5 import stat
6
6
7 from typing import (
8 Any,
9 Callable,
10 Iterator,
11 Optional,
12 )
13
7 from .i18n import _
14 from .i18n import _
8 from . import (
15 from . import (
9 encoding,
16 encoding,
10 error,
17 error,
11 policy,
18 policy,
12 pycompat,
19 pycompat,
13 util,
20 util,
14 )
21 )
15
22
16 if pycompat.TYPE_CHECKING:
17 from typing import (
18 Any,
19 Callable,
20 Iterator,
21 Optional,
22 )
23
24
25 rustdirs = policy.importrust('dirstate', 'Dirs')
23 rustdirs = policy.importrust('dirstate', 'Dirs')
26 parsers = policy.importmod('parsers')
24 parsers = policy.importmod('parsers')
27
25
28
26
29 def _lowerclean(s):
27 def _lowerclean(s):
30 # type: (bytes) -> bytes
28 # type: (bytes) -> bytes
31 return encoding.hfsignoreclean(s.lower())
29 return encoding.hfsignoreclean(s.lower())
32
30
33
31
34 class pathauditor:
32 class pathauditor:
35 """ensure that a filesystem path contains no banned components.
33 """ensure that a filesystem path contains no banned components.
36 the following properties of a path are checked:
34 the following properties of a path are checked:
37
35
38 - ends with a directory separator
36 - ends with a directory separator
39 - under top-level .hg
37 - under top-level .hg
40 - starts at the root of a windows drive
38 - starts at the root of a windows drive
41 - contains ".."
39 - contains ".."
42
40
43 More check are also done about the file system states:
41 More check are also done about the file system states:
44 - traverses a symlink (e.g. a/symlink_here/b)
42 - traverses a symlink (e.g. a/symlink_here/b)
45 - inside a nested repository (a callback can be used to approve
43 - inside a nested repository (a callback can be used to approve
46 some nested repositories, e.g., subrepositories)
44 some nested repositories, e.g., subrepositories)
47
45
48 The file system checks are only done when 'realfs' is set to True (the
46 The file system checks are only done when 'realfs' is set to True (the
49 default). They should be disable then we are auditing path for operation on
47 default). They should be disable then we are auditing path for operation on
50 stored history.
48 stored history.
51
49
52 If 'cached' is set to True, audited paths and sub-directories are cached.
50 If 'cached' is set to True, audited paths and sub-directories are cached.
53 Be careful to not keep the cache of unmanaged directories for long because
51 Be careful to not keep the cache of unmanaged directories for long because
54 audited paths may be replaced with symlinks.
52 audited paths may be replaced with symlinks.
55 """
53 """
56
54
57 def __init__(self, root, callback=None, realfs=True, cached=False):
55 def __init__(self, root, callback=None, realfs=True, cached=False):
58 self.audited = set()
56 self.audited = set()
59 self.auditeddir = dict()
57 self.auditeddir = dict()
60 self.root = root
58 self.root = root
61 self._realfs = realfs
59 self._realfs = realfs
62 self._cached = cached
60 self._cached = cached
63 self.callback = callback
61 self.callback = callback
64 if os.path.lexists(root) and not util.fscasesensitive(root):
62 if os.path.lexists(root) and not util.fscasesensitive(root):
65 self.normcase = util.normcase
63 self.normcase = util.normcase
66 else:
64 else:
67 self.normcase = lambda x: x
65 self.normcase = lambda x: x
68
66
69 def __call__(self, path, mode=None):
67 def __call__(self, path, mode=None):
70 # type: (bytes, Optional[Any]) -> None
68 # type: (bytes, Optional[Any]) -> None
71 """Check the relative path.
69 """Check the relative path.
72 path may contain a pattern (e.g. foodir/**.txt)"""
70 path may contain a pattern (e.g. foodir/**.txt)"""
73
71
74 path = util.localpath(path)
72 path = util.localpath(path)
75 if path in self.audited:
73 if path in self.audited:
76 return
74 return
77 # AIX ignores "/" at end of path, others raise EISDIR.
75 # AIX ignores "/" at end of path, others raise EISDIR.
78 if util.endswithsep(path):
76 if util.endswithsep(path):
79 raise error.InputError(
77 raise error.InputError(
80 _(b"path ends in directory separator: %s") % path
78 _(b"path ends in directory separator: %s") % path
81 )
79 )
82 parts = util.splitpath(path)
80 parts = util.splitpath(path)
83 if (
81 if (
84 os.path.splitdrive(path)[0]
82 os.path.splitdrive(path)[0]
85 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
83 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
86 or pycompat.ospardir in parts
84 or pycompat.ospardir in parts
87 ):
85 ):
88 raise error.InputError(
86 raise error.InputError(
89 _(b"path contains illegal component: %s") % path
87 _(b"path contains illegal component: %s") % path
90 )
88 )
91 # Windows shortname aliases
89 # Windows shortname aliases
92 if b"~" in path:
90 if b"~" in path:
93 for p in parts:
91 for p in parts:
94 if b"~" in p:
92 if b"~" in p:
95 first, last = p.split(b"~", 1)
93 first, last = p.split(b"~", 1)
96 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
94 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
97 raise error.InputError(
95 raise error.InputError(
98 _(b"path contains illegal component: %s") % path
96 _(b"path contains illegal component: %s") % path
99 )
97 )
100 if b'.hg' in _lowerclean(path):
98 if b'.hg' in _lowerclean(path):
101 lparts = [_lowerclean(p) for p in parts]
99 lparts = [_lowerclean(p) for p in parts]
102 for p in b'.hg', b'.hg.':
100 for p in b'.hg', b'.hg.':
103 if p in lparts[1:]:
101 if p in lparts[1:]:
104 pos = lparts.index(p)
102 pos = lparts.index(p)
105 base = os.path.join(*parts[:pos])
103 base = os.path.join(*parts[:pos])
106 raise error.InputError(
104 raise error.InputError(
107 _(b"path '%s' is inside nested repo %r")
105 _(b"path '%s' is inside nested repo %r")
108 % (path, pycompat.bytestr(base))
106 % (path, pycompat.bytestr(base))
109 )
107 )
110
108
111 if self._realfs:
109 if self._realfs:
112 parts.pop()
110 parts.pop()
113 # It's important that we check the path parts starting from the root.
111 # It's important that we check the path parts starting from the root.
114 # We don't want to add "foo/bar/baz" to auditeddir before checking if
112 # We don't want to add "foo/bar/baz" to auditeddir before checking if
115 # there's a "foo/.hg" directory. This also means we won't accidentally
113 # there's a "foo/.hg" directory. This also means we won't accidentally
116 # traverse a symlink into some other filesystem (which is potentially
114 # traverse a symlink into some other filesystem (which is potentially
117 # expensive to access).
115 # expensive to access).
118 for i in range(len(parts)):
116 for i in range(len(parts)):
119 prefix = pycompat.ossep.join(parts[: i + 1])
117 prefix = pycompat.ossep.join(parts[: i + 1])
120 if prefix in self.auditeddir:
118 if prefix in self.auditeddir:
121 res = self.auditeddir[prefix]
119 res = self.auditeddir[prefix]
122 else:
120 else:
123 res = self._checkfs_exists(prefix, path)
121 res = self._checkfs_exists(prefix, path)
124 if self._cached:
122 if self._cached:
125 self.auditeddir[prefix] = res
123 self.auditeddir[prefix] = res
126 if not res:
124 if not res:
127 break
125 break
128
126
129 if self._cached:
127 if self._cached:
130 self.audited.add(path)
128 self.audited.add(path)
131
129
132 def _checkfs_exists(self, prefix: bytes, path: bytes) -> bool:
130 def _checkfs_exists(self, prefix: bytes, path: bytes) -> bool:
133 """raise exception if a file system backed check fails.
131 """raise exception if a file system backed check fails.
134
132
135 Return a bool that indicates that the directory (or file) exists."""
133 Return a bool that indicates that the directory (or file) exists."""
136 curpath = os.path.join(self.root, prefix)
134 curpath = os.path.join(self.root, prefix)
137 try:
135 try:
138 st = os.lstat(curpath)
136 st = os.lstat(curpath)
139 except OSError as err:
137 except OSError as err:
140 if err.errno == errno.ENOENT:
138 if err.errno == errno.ENOENT:
141 return False
139 return False
142 # EINVAL can be raised as invalid path syntax under win32.
140 # EINVAL can be raised as invalid path syntax under win32.
143 # They must be ignored for patterns can be checked too.
141 # They must be ignored for patterns can be checked too.
144 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
142 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
145 raise
143 raise
146 else:
144 else:
147 if stat.S_ISLNK(st.st_mode):
145 if stat.S_ISLNK(st.st_mode):
148 msg = _(b'path %r traverses symbolic link %r') % (
146 msg = _(b'path %r traverses symbolic link %r') % (
149 pycompat.bytestr(path),
147 pycompat.bytestr(path),
150 pycompat.bytestr(prefix),
148 pycompat.bytestr(prefix),
151 )
149 )
152 raise error.Abort(msg)
150 raise error.Abort(msg)
153 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
151 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
154 os.path.join(curpath, b'.hg')
152 os.path.join(curpath, b'.hg')
155 ):
153 ):
156 if not self.callback or not self.callback(curpath):
154 if not self.callback or not self.callback(curpath):
157 msg = _(b"path '%s' is inside nested repo %r")
155 msg = _(b"path '%s' is inside nested repo %r")
158 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
156 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
159 return True
157 return True
160
158
161 def check(self, path):
159 def check(self, path):
162 # type: (bytes) -> bool
160 # type: (bytes) -> bool
163 try:
161 try:
164 self(path)
162 self(path)
165 return True
163 return True
166 except (OSError, error.Abort):
164 except (OSError, error.Abort):
167 return False
165 return False
168
166
169 @contextlib.contextmanager
167 @contextlib.contextmanager
170 def cached(self):
168 def cached(self):
171 if self._cached:
169 if self._cached:
172 yield
170 yield
173 else:
171 else:
174 try:
172 try:
175 self._cached = True
173 self._cached = True
176 yield
174 yield
177 finally:
175 finally:
178 self.audited.clear()
176 self.audited.clear()
179 self.auditeddir.clear()
177 self.auditeddir.clear()
180 self._cached = False
178 self._cached = False
181
179
182
180
183 def canonpath(root, cwd, myname, auditor=None):
181 def canonpath(root, cwd, myname, auditor=None):
184 # type: (bytes, bytes, bytes, Optional[pathauditor]) -> bytes
182 # type: (bytes, bytes, bytes, Optional[pathauditor]) -> bytes
185 """return the canonical path of myname, given cwd and root
183 """return the canonical path of myname, given cwd and root
186
184
187 >>> def check(root, cwd, myname):
185 >>> def check(root, cwd, myname):
188 ... a = pathauditor(root, realfs=False)
186 ... a = pathauditor(root, realfs=False)
189 ... try:
187 ... try:
190 ... return canonpath(root, cwd, myname, a)
188 ... return canonpath(root, cwd, myname, a)
191 ... except error.Abort:
189 ... except error.Abort:
192 ... return 'aborted'
190 ... return 'aborted'
193 >>> def unixonly(root, cwd, myname, expected='aborted'):
191 >>> def unixonly(root, cwd, myname, expected='aborted'):
194 ... if pycompat.iswindows:
192 ... if pycompat.iswindows:
195 ... return expected
193 ... return expected
196 ... return check(root, cwd, myname)
194 ... return check(root, cwd, myname)
197 >>> def winonly(root, cwd, myname, expected='aborted'):
195 >>> def winonly(root, cwd, myname, expected='aborted'):
198 ... if not pycompat.iswindows:
196 ... if not pycompat.iswindows:
199 ... return expected
197 ... return expected
200 ... return check(root, cwd, myname)
198 ... return check(root, cwd, myname)
201 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
199 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
202 'aborted'
200 'aborted'
203 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
201 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
204 'aborted'
202 'aborted'
205 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
203 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
206 'aborted'
204 'aborted'
207 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
205 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
208 ... b'filename')
206 ... b'filename')
209 'filename'
207 'filename'
210 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
208 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
211 'filename'
209 'filename'
212 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
210 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
213 ... b'subdir/filename')
211 ... b'subdir/filename')
214 'subdir/filename'
212 'subdir/filename'
215 >>> unixonly(b'/repo', b'/dir', b'filename')
213 >>> unixonly(b'/repo', b'/dir', b'filename')
216 'aborted'
214 'aborted'
217 >>> unixonly(b'/repo', b'/', b'filename')
215 >>> unixonly(b'/repo', b'/', b'filename')
218 'aborted'
216 'aborted'
219 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
217 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
220 'filename'
218 'filename'
221 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
219 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
222 'filename'
220 'filename'
223 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
221 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
224 'subdir/filename'
222 'subdir/filename'
225 """
223 """
226 if util.endswithsep(root):
224 if util.endswithsep(root):
227 rootsep = root
225 rootsep = root
228 else:
226 else:
229 rootsep = root + pycompat.ossep
227 rootsep = root + pycompat.ossep
230 name = myname
228 name = myname
231 if not os.path.isabs(name):
229 if not os.path.isabs(name):
232 name = os.path.join(root, cwd, name)
230 name = os.path.join(root, cwd, name)
233 name = os.path.normpath(name)
231 name = os.path.normpath(name)
234 if auditor is None:
232 if auditor is None:
235 auditor = pathauditor(root)
233 auditor = pathauditor(root)
236 if name != rootsep and name.startswith(rootsep):
234 if name != rootsep and name.startswith(rootsep):
237 name = name[len(rootsep) :]
235 name = name[len(rootsep) :]
238 auditor(name)
236 auditor(name)
239 return util.pconvert(name)
237 return util.pconvert(name)
240 elif name == root:
238 elif name == root:
241 return b''
239 return b''
242 else:
240 else:
243 # Determine whether `name' is in the hierarchy at or beneath `root',
241 # Determine whether `name' is in the hierarchy at or beneath `root',
244 # by iterating name=dirname(name) until that causes no change (can't
242 # by iterating name=dirname(name) until that causes no change (can't
245 # check name == '/', because that doesn't work on windows). The list
243 # check name == '/', because that doesn't work on windows). The list
246 # `rel' holds the reversed list of components making up the relative
244 # `rel' holds the reversed list of components making up the relative
247 # file name we want.
245 # file name we want.
248 rel = []
246 rel = []
249 while True:
247 while True:
250 try:
248 try:
251 s = util.samefile(name, root)
249 s = util.samefile(name, root)
252 except OSError:
250 except OSError:
253 s = False
251 s = False
254 if s:
252 if s:
255 if not rel:
253 if not rel:
256 # name was actually the same as root (maybe a symlink)
254 # name was actually the same as root (maybe a symlink)
257 return b''
255 return b''
258 rel.reverse()
256 rel.reverse()
259 name = os.path.join(*rel)
257 name = os.path.join(*rel)
260 auditor(name)
258 auditor(name)
261 return util.pconvert(name)
259 return util.pconvert(name)
262 dirname, basename = util.split(name)
260 dirname, basename = util.split(name)
263 rel.append(basename)
261 rel.append(basename)
264 if dirname == name:
262 if dirname == name:
265 break
263 break
266 name = dirname
264 name = dirname
267
265
268 # A common mistake is to use -R, but specify a file relative to the repo
266 # A common mistake is to use -R, but specify a file relative to the repo
269 # instead of cwd. Detect that case, and provide a hint to the user.
267 # instead of cwd. Detect that case, and provide a hint to the user.
270 hint = None
268 hint = None
271 try:
269 try:
272 if cwd != root:
270 if cwd != root:
273 canonpath(root, root, myname, auditor)
271 canonpath(root, root, myname, auditor)
274 relpath = util.pathto(root, cwd, b'')
272 relpath = util.pathto(root, cwd, b'')
275 if relpath.endswith(pycompat.ossep):
273 if relpath.endswith(pycompat.ossep):
276 relpath = relpath[:-1]
274 relpath = relpath[:-1]
277 hint = _(b"consider using '--cwd %s'") % relpath
275 hint = _(b"consider using '--cwd %s'") % relpath
278 except error.Abort:
276 except error.Abort:
279 pass
277 pass
280
278
281 raise error.Abort(
279 raise error.Abort(
282 _(b"%s not under root '%s'") % (myname, root), hint=hint
280 _(b"%s not under root '%s'") % (myname, root), hint=hint
283 )
281 )
284
282
285
283
286 def normasprefix(path):
284 def normasprefix(path):
287 # type: (bytes) -> bytes
285 # type: (bytes) -> bytes
288 """normalize the specified path as path prefix
286 """normalize the specified path as path prefix
289
287
290 Returned value can be used safely for "p.startswith(prefix)",
288 Returned value can be used safely for "p.startswith(prefix)",
291 "p[len(prefix):]", and so on.
289 "p[len(prefix):]", and so on.
292
290
293 For efficiency, this expects "path" argument to be already
291 For efficiency, this expects "path" argument to be already
294 normalized by "os.path.normpath", "os.path.realpath", and so on.
292 normalized by "os.path.normpath", "os.path.realpath", and so on.
295
293
296 See also issue3033 for detail about need of this function.
294 See also issue3033 for detail about need of this function.
297
295
298 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
296 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
299 '/foo/bar/'
297 '/foo/bar/'
300 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
298 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
301 '/'
299 '/'
302 """
300 """
303 d, p = os.path.splitdrive(path)
301 d, p = os.path.splitdrive(path)
304 if len(p) != len(pycompat.ossep):
302 if len(p) != len(pycompat.ossep):
305 return path + pycompat.ossep
303 return path + pycompat.ossep
306 else:
304 else:
307 return path
305 return path
308
306
309
307
310 def finddirs(path):
308 def finddirs(path):
311 # type: (bytes) -> Iterator[bytes]
309 # type: (bytes) -> Iterator[bytes]
312 pos = path.rfind(b'/')
310 pos = path.rfind(b'/')
313 while pos != -1:
311 while pos != -1:
314 yield path[:pos]
312 yield path[:pos]
315 pos = path.rfind(b'/', 0, pos)
313 pos = path.rfind(b'/', 0, pos)
316 yield b''
314 yield b''
317
315
318
316
319 class dirs:
317 class dirs:
320 '''a multiset of directory names from a set of file paths'''
318 '''a multiset of directory names from a set of file paths'''
321
319
322 def __init__(self, map, only_tracked=False):
320 def __init__(self, map, only_tracked=False):
323 """
321 """
324 a dict map indicates a dirstate while a list indicates a manifest
322 a dict map indicates a dirstate while a list indicates a manifest
325 """
323 """
326 self._dirs = {}
324 self._dirs = {}
327 addpath = self.addpath
325 addpath = self.addpath
328 if isinstance(map, dict) and only_tracked:
326 if isinstance(map, dict) and only_tracked:
329 for f, s in map.items():
327 for f, s in map.items():
330 if s.state != b'r':
328 if s.state != b'r':
331 addpath(f)
329 addpath(f)
332 elif only_tracked:
330 elif only_tracked:
333 msg = b"`only_tracked` is only supported with a dict source"
331 msg = b"`only_tracked` is only supported with a dict source"
334 raise error.ProgrammingError(msg)
332 raise error.ProgrammingError(msg)
335 else:
333 else:
336 for f in map:
334 for f in map:
337 addpath(f)
335 addpath(f)
338
336
339 def addpath(self, path):
337 def addpath(self, path):
340 # type: (bytes) -> None
338 # type: (bytes) -> None
341 dirs = self._dirs
339 dirs = self._dirs
342 for base in finddirs(path):
340 for base in finddirs(path):
343 if base.endswith(b'/'):
341 if base.endswith(b'/'):
344 raise ValueError(
342 raise ValueError(
345 "found invalid consecutive slashes in path: %r" % base
343 "found invalid consecutive slashes in path: %r" % base
346 )
344 )
347 if base in dirs:
345 if base in dirs:
348 dirs[base] += 1
346 dirs[base] += 1
349 return
347 return
350 dirs[base] = 1
348 dirs[base] = 1
351
349
352 def delpath(self, path):
350 def delpath(self, path):
353 # type: (bytes) -> None
351 # type: (bytes) -> None
354 dirs = self._dirs
352 dirs = self._dirs
355 for base in finddirs(path):
353 for base in finddirs(path):
356 if dirs[base] > 1:
354 if dirs[base] > 1:
357 dirs[base] -= 1
355 dirs[base] -= 1
358 return
356 return
359 del dirs[base]
357 del dirs[base]
360
358
361 def __iter__(self):
359 def __iter__(self):
362 return iter(self._dirs)
360 return iter(self._dirs)
363
361
364 def __contains__(self, d):
362 def __contains__(self, d):
365 # type: (bytes) -> bool
363 # type: (bytes) -> bool
366 return d in self._dirs
364 return d in self._dirs
367
365
368
366
369 if util.safehasattr(parsers, 'dirs'):
367 if util.safehasattr(parsers, 'dirs'):
370 dirs = parsers.dirs
368 dirs = parsers.dirs
371
369
372 if rustdirs is not None:
370 if rustdirs is not None:
373 dirs = rustdirs
371 dirs = rustdirs
374
372
375
373
376 # forward two methods from posixpath that do what we need, but we'd
374 # forward two methods from posixpath that do what we need, but we'd
377 # rather not let our internals know that we're thinking in posix terms
375 # rather not let our internals know that we're thinking in posix terms
378 # - instead we'll let them be oblivious.
376 # - instead we'll let them be oblivious.
379 join = posixpath.join
377 join = posixpath.join
380 dirname = posixpath.dirname # type: Callable[[bytes], bytes]
378 dirname = posixpath.dirname # type: Callable[[bytes], bytes]
General Comments 0
You need to be logged in to leave comments. Login now