##// END OF EJS Templates
pathauditor: no need to normcase the paths...
Arseniy Alekseyev -
r50780:445b4d81 default
parent child Browse files
Show More
@@ -1,379 +1,373 b''
1 import contextlib
1 import contextlib
2 import errno
2 import errno
3 import os
3 import os
4 import posixpath
4 import posixpath
5 import stat
5 import stat
6
6
7 from .i18n import _
7 from .i18n import _
8 from . import (
8 from . import (
9 encoding,
9 encoding,
10 error,
10 error,
11 policy,
11 policy,
12 pycompat,
12 pycompat,
13 util,
13 util,
14 )
14 )
15
15
16 if pycompat.TYPE_CHECKING:
16 if pycompat.TYPE_CHECKING:
17 from typing import (
17 from typing import (
18 Any,
18 Any,
19 Callable,
19 Callable,
20 Iterator,
20 Iterator,
21 Optional,
21 Optional,
22 )
22 )
23
23
24
24
25 rustdirs = policy.importrust('dirstate', 'Dirs')
25 rustdirs = policy.importrust('dirstate', 'Dirs')
26 parsers = policy.importmod('parsers')
26 parsers = policy.importmod('parsers')
27
27
28
28
29 def _lowerclean(s):
29 def _lowerclean(s):
30 # type: (bytes) -> bytes
30 # type: (bytes) -> bytes
31 return encoding.hfsignoreclean(s.lower())
31 return encoding.hfsignoreclean(s.lower())
32
32
33
33
34 class pathauditor:
34 class pathauditor:
35 """ensure that a filesystem path contains no banned components.
35 """ensure that a filesystem path contains no banned components.
36 the following properties of a path are checked:
36 the following properties of a path are checked:
37
37
38 - ends with a directory separator
38 - ends with a directory separator
39 - under top-level .hg
39 - under top-level .hg
40 - starts at the root of a windows drive
40 - starts at the root of a windows drive
41 - contains ".."
41 - contains ".."
42
42
43 More check are also done about the file system states:
43 More check are also done about the file system states:
44 - traverses a symlink (e.g. a/symlink_here/b)
44 - traverses a symlink (e.g. a/symlink_here/b)
45 - inside a nested repository (a callback can be used to approve
45 - inside a nested repository (a callback can be used to approve
46 some nested repositories, e.g., subrepositories)
46 some nested repositories, e.g., subrepositories)
47
47
48 The file system checks are only done when 'realfs' is set to True (the
48 The file system checks are only done when 'realfs' is set to True (the
49 default). They should be disable then we are auditing path for operation on
49 default). They should be disable then we are auditing path for operation on
50 stored history.
50 stored history.
51
51
52 If 'cached' is set to True, audited paths and sub-directories are cached.
52 If 'cached' is set to True, audited paths and sub-directories are cached.
53 Be careful to not keep the cache of unmanaged directories for long because
53 Be careful to not keep the cache of unmanaged directories for long because
54 audited paths may be replaced with symlinks.
54 audited paths may be replaced with symlinks.
55 """
55 """
56
56
57 def __init__(self, root, callback=None, realfs=True, cached=False):
57 def __init__(self, root, callback=None, realfs=True, cached=False):
58 self.audited = set()
58 self.audited = set()
59 self.auditeddir = set()
59 self.auditeddir = set()
60 self.root = root
60 self.root = root
61 self._realfs = realfs
61 self._realfs = realfs
62 self._cached = cached
62 self._cached = cached
63 self.callback = callback
63 self.callback = callback
64 if os.path.lexists(root) and not util.fscasesensitive(root):
64 if os.path.lexists(root) and not util.fscasesensitive(root):
65 self.normcase = util.normcase
65 self.normcase = util.normcase
66 else:
66 else:
67 self.normcase = lambda x: x
67 self.normcase = lambda x: x
68
68
69 def __call__(self, path, mode=None):
69 def __call__(self, path, mode=None):
70 # type: (bytes, Optional[Any]) -> None
70 # type: (bytes, Optional[Any]) -> None
71 """Check the relative path.
71 """Check the relative path.
72 path may contain a pattern (e.g. foodir/**.txt)"""
72 path may contain a pattern (e.g. foodir/**.txt)"""
73
73
74 path = util.localpath(path)
74 path = util.localpath(path)
75 normpath = self.normcase(path)
75 if path in self.audited:
76 if normpath in self.audited:
77 return
76 return
78 # AIX ignores "/" at end of path, others raise EISDIR.
77 # AIX ignores "/" at end of path, others raise EISDIR.
79 if util.endswithsep(path):
78 if util.endswithsep(path):
80 raise error.InputError(
79 raise error.InputError(
81 _(b"path ends in directory separator: %s") % path
80 _(b"path ends in directory separator: %s") % path
82 )
81 )
83 parts = util.splitpath(path)
82 parts = util.splitpath(path)
84 if (
83 if (
85 os.path.splitdrive(path)[0]
84 os.path.splitdrive(path)[0]
86 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
85 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
87 or pycompat.ospardir in parts
86 or pycompat.ospardir in parts
88 ):
87 ):
89 raise error.InputError(
88 raise error.InputError(
90 _(b"path contains illegal component: %s") % path
89 _(b"path contains illegal component: %s") % path
91 )
90 )
92 # Windows shortname aliases
91 # Windows shortname aliases
93 if b"~" in path:
92 if b"~" in path:
94 for p in parts:
93 for p in parts:
95 if b"~" in p:
94 if b"~" in p:
96 first, last = p.split(b"~", 1)
95 first, last = p.split(b"~", 1)
97 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
96 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
98 raise error.InputError(
97 raise error.InputError(
99 _(b"path contains illegal component: %s") % path
98 _(b"path contains illegal component: %s") % path
100 )
99 )
101 if b'.hg' in _lowerclean(path):
100 if b'.hg' in _lowerclean(path):
102 lparts = [_lowerclean(p) for p in parts]
101 lparts = [_lowerclean(p) for p in parts]
103 for p in b'.hg', b'.hg.':
102 for p in b'.hg', b'.hg.':
104 if p in lparts[1:]:
103 if p in lparts[1:]:
105 pos = lparts.index(p)
104 pos = lparts.index(p)
106 base = os.path.join(*parts[:pos])
105 base = os.path.join(*parts[:pos])
107 raise error.InputError(
106 raise error.InputError(
108 _(b"path '%s' is inside nested repo %r")
107 _(b"path '%s' is inside nested repo %r")
109 % (path, pycompat.bytestr(base))
108 % (path, pycompat.bytestr(base))
110 )
109 )
111
110
112 normparts = util.splitpath(normpath)
113 assert len(parts) == len(normparts)
114
115 parts.pop()
111 parts.pop()
116 normparts.pop()
117 # It's important that we check the path parts starting from the root.
112 # It's important that we check the path parts starting from the root.
118 # We don't want to add "foo/bar/baz" to auditeddir before checking if
113 # We don't want to add "foo/bar/baz" to auditeddir before checking if
119 # there's a "foo/.hg" directory. This also means we won't accidentally
114 # there's a "foo/.hg" directory. This also means we won't accidentally
120 # traverse a symlink into some other filesystem (which is potentially
115 # traverse a symlink into some other filesystem (which is potentially
121 # expensive to access).
116 # expensive to access).
122 for i in range(len(parts)):
117 for i in range(len(parts)):
123 prefix = pycompat.ossep.join(parts[: i + 1])
118 prefix = pycompat.ossep.join(parts[: i + 1])
124 normprefix = pycompat.ossep.join(normparts[: i + 1])
119 if prefix in self.auditeddir:
125 if normprefix in self.auditeddir:
126 continue
120 continue
127 if self._realfs:
121 if self._realfs:
128 self._checkfs(prefix, path)
122 self._checkfs(prefix, path)
129 if self._cached:
123 if self._cached:
130 self.auditeddir.add(normprefix)
124 self.auditeddir.add(prefix)
131
125
132 if self._cached:
126 if self._cached:
133 self.audited.add(normpath)
127 self.audited.add(path)
134
128
135 def _checkfs(self, prefix, path):
129 def _checkfs(self, prefix, path):
136 # type: (bytes, bytes) -> None
130 # type: (bytes, bytes) -> None
137 """raise exception if a file system backed check fails"""
131 """raise exception if a file system backed check fails"""
138 curpath = os.path.join(self.root, prefix)
132 curpath = os.path.join(self.root, prefix)
139 try:
133 try:
140 st = os.lstat(curpath)
134 st = os.lstat(curpath)
141 except OSError as err:
135 except OSError as err:
142 # EINVAL can be raised as invalid path syntax under win32.
136 # EINVAL can be raised as invalid path syntax under win32.
143 # They must be ignored for patterns can be checked too.
137 # They must be ignored for patterns can be checked too.
144 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
138 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
145 raise
139 raise
146 else:
140 else:
147 if stat.S_ISLNK(st.st_mode):
141 if stat.S_ISLNK(st.st_mode):
148 msg = _(b'path %r traverses symbolic link %r') % (
142 msg = _(b'path %r traverses symbolic link %r') % (
149 pycompat.bytestr(path),
143 pycompat.bytestr(path),
150 pycompat.bytestr(prefix),
144 pycompat.bytestr(prefix),
151 )
145 )
152 raise error.Abort(msg)
146 raise error.Abort(msg)
153 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
147 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
154 os.path.join(curpath, b'.hg')
148 os.path.join(curpath, b'.hg')
155 ):
149 ):
156 if not self.callback or not self.callback(curpath):
150 if not self.callback or not self.callback(curpath):
157 msg = _(b"path '%s' is inside nested repo %r")
151 msg = _(b"path '%s' is inside nested repo %r")
158 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
152 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
159
153
160 def check(self, path):
154 def check(self, path):
161 # type: (bytes) -> bool
155 # type: (bytes) -> bool
162 try:
156 try:
163 self(path)
157 self(path)
164 return True
158 return True
165 except (OSError, error.Abort):
159 except (OSError, error.Abort):
166 return False
160 return False
167
161
168 @contextlib.contextmanager
162 @contextlib.contextmanager
169 def cached(self):
163 def cached(self):
170 if self._cached:
164 if self._cached:
171 yield
165 yield
172 else:
166 else:
173 try:
167 try:
174 self._cached = True
168 self._cached = True
175 yield
169 yield
176 finally:
170 finally:
177 self.audited.clear()
171 self.audited.clear()
178 self.auditeddir.clear()
172 self.auditeddir.clear()
179 self._cached = False
173 self._cached = False
180
174
181
175
182 def canonpath(root, cwd, myname, auditor=None):
176 def canonpath(root, cwd, myname, auditor=None):
183 # type: (bytes, bytes, bytes, Optional[pathauditor]) -> bytes
177 # type: (bytes, bytes, bytes, Optional[pathauditor]) -> bytes
184 """return the canonical path of myname, given cwd and root
178 """return the canonical path of myname, given cwd and root
185
179
186 >>> def check(root, cwd, myname):
180 >>> def check(root, cwd, myname):
187 ... a = pathauditor(root, realfs=False)
181 ... a = pathauditor(root, realfs=False)
188 ... try:
182 ... try:
189 ... return canonpath(root, cwd, myname, a)
183 ... return canonpath(root, cwd, myname, a)
190 ... except error.Abort:
184 ... except error.Abort:
191 ... return 'aborted'
185 ... return 'aborted'
192 >>> def unixonly(root, cwd, myname, expected='aborted'):
186 >>> def unixonly(root, cwd, myname, expected='aborted'):
193 ... if pycompat.iswindows:
187 ... if pycompat.iswindows:
194 ... return expected
188 ... return expected
195 ... return check(root, cwd, myname)
189 ... return check(root, cwd, myname)
196 >>> def winonly(root, cwd, myname, expected='aborted'):
190 >>> def winonly(root, cwd, myname, expected='aborted'):
197 ... if not pycompat.iswindows:
191 ... if not pycompat.iswindows:
198 ... return expected
192 ... return expected
199 ... return check(root, cwd, myname)
193 ... return check(root, cwd, myname)
200 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
194 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
201 'aborted'
195 'aborted'
202 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
196 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
203 'aborted'
197 'aborted'
204 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
198 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
205 'aborted'
199 'aborted'
206 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
200 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
207 ... b'filename')
201 ... b'filename')
208 'filename'
202 'filename'
209 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
203 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
210 'filename'
204 'filename'
211 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
205 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
212 ... b'subdir/filename')
206 ... b'subdir/filename')
213 'subdir/filename'
207 'subdir/filename'
214 >>> unixonly(b'/repo', b'/dir', b'filename')
208 >>> unixonly(b'/repo', b'/dir', b'filename')
215 'aborted'
209 'aborted'
216 >>> unixonly(b'/repo', b'/', b'filename')
210 >>> unixonly(b'/repo', b'/', b'filename')
217 'aborted'
211 'aborted'
218 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
212 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
219 'filename'
213 'filename'
220 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
214 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
221 'filename'
215 'filename'
222 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
216 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
223 'subdir/filename'
217 'subdir/filename'
224 """
218 """
225 if util.endswithsep(root):
219 if util.endswithsep(root):
226 rootsep = root
220 rootsep = root
227 else:
221 else:
228 rootsep = root + pycompat.ossep
222 rootsep = root + pycompat.ossep
229 name = myname
223 name = myname
230 if not os.path.isabs(name):
224 if not os.path.isabs(name):
231 name = os.path.join(root, cwd, name)
225 name = os.path.join(root, cwd, name)
232 name = os.path.normpath(name)
226 name = os.path.normpath(name)
233 if auditor is None:
227 if auditor is None:
234 auditor = pathauditor(root)
228 auditor = pathauditor(root)
235 if name != rootsep and name.startswith(rootsep):
229 if name != rootsep and name.startswith(rootsep):
236 name = name[len(rootsep) :]
230 name = name[len(rootsep) :]
237 auditor(name)
231 auditor(name)
238 return util.pconvert(name)
232 return util.pconvert(name)
239 elif name == root:
233 elif name == root:
240 return b''
234 return b''
241 else:
235 else:
242 # Determine whether `name' is in the hierarchy at or beneath `root',
236 # Determine whether `name' is in the hierarchy at or beneath `root',
243 # by iterating name=dirname(name) until that causes no change (can't
237 # by iterating name=dirname(name) until that causes no change (can't
244 # check name == '/', because that doesn't work on windows). The list
238 # check name == '/', because that doesn't work on windows). The list
245 # `rel' holds the reversed list of components making up the relative
239 # `rel' holds the reversed list of components making up the relative
246 # file name we want.
240 # file name we want.
247 rel = []
241 rel = []
248 while True:
242 while True:
249 try:
243 try:
250 s = util.samefile(name, root)
244 s = util.samefile(name, root)
251 except OSError:
245 except OSError:
252 s = False
246 s = False
253 if s:
247 if s:
254 if not rel:
248 if not rel:
255 # name was actually the same as root (maybe a symlink)
249 # name was actually the same as root (maybe a symlink)
256 return b''
250 return b''
257 rel.reverse()
251 rel.reverse()
258 name = os.path.join(*rel)
252 name = os.path.join(*rel)
259 auditor(name)
253 auditor(name)
260 return util.pconvert(name)
254 return util.pconvert(name)
261 dirname, basename = util.split(name)
255 dirname, basename = util.split(name)
262 rel.append(basename)
256 rel.append(basename)
263 if dirname == name:
257 if dirname == name:
264 break
258 break
265 name = dirname
259 name = dirname
266
260
267 # A common mistake is to use -R, but specify a file relative to the repo
261 # A common mistake is to use -R, but specify a file relative to the repo
268 # instead of cwd. Detect that case, and provide a hint to the user.
262 # instead of cwd. Detect that case, and provide a hint to the user.
269 hint = None
263 hint = None
270 try:
264 try:
271 if cwd != root:
265 if cwd != root:
272 canonpath(root, root, myname, auditor)
266 canonpath(root, root, myname, auditor)
273 relpath = util.pathto(root, cwd, b'')
267 relpath = util.pathto(root, cwd, b'')
274 if relpath.endswith(pycompat.ossep):
268 if relpath.endswith(pycompat.ossep):
275 relpath = relpath[:-1]
269 relpath = relpath[:-1]
276 hint = _(b"consider using '--cwd %s'") % relpath
270 hint = _(b"consider using '--cwd %s'") % relpath
277 except error.Abort:
271 except error.Abort:
278 pass
272 pass
279
273
280 raise error.Abort(
274 raise error.Abort(
281 _(b"%s not under root '%s'") % (myname, root), hint=hint
275 _(b"%s not under root '%s'") % (myname, root), hint=hint
282 )
276 )
283
277
284
278
285 def normasprefix(path):
279 def normasprefix(path):
286 # type: (bytes) -> bytes
280 # type: (bytes) -> bytes
287 """normalize the specified path as path prefix
281 """normalize the specified path as path prefix
288
282
289 Returned value can be used safely for "p.startswith(prefix)",
283 Returned value can be used safely for "p.startswith(prefix)",
290 "p[len(prefix):]", and so on.
284 "p[len(prefix):]", and so on.
291
285
292 For efficiency, this expects "path" argument to be already
286 For efficiency, this expects "path" argument to be already
293 normalized by "os.path.normpath", "os.path.realpath", and so on.
287 normalized by "os.path.normpath", "os.path.realpath", and so on.
294
288
295 See also issue3033 for detail about need of this function.
289 See also issue3033 for detail about need of this function.
296
290
297 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
291 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
298 '/foo/bar/'
292 '/foo/bar/'
299 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
293 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
300 '/'
294 '/'
301 """
295 """
302 d, p = os.path.splitdrive(path)
296 d, p = os.path.splitdrive(path)
303 if len(p) != len(pycompat.ossep):
297 if len(p) != len(pycompat.ossep):
304 return path + pycompat.ossep
298 return path + pycompat.ossep
305 else:
299 else:
306 return path
300 return path
307
301
308
302
309 def finddirs(path):
303 def finddirs(path):
310 # type: (bytes) -> Iterator[bytes]
304 # type: (bytes) -> Iterator[bytes]
311 pos = path.rfind(b'/')
305 pos = path.rfind(b'/')
312 while pos != -1:
306 while pos != -1:
313 yield path[:pos]
307 yield path[:pos]
314 pos = path.rfind(b'/', 0, pos)
308 pos = path.rfind(b'/', 0, pos)
315 yield b''
309 yield b''
316
310
317
311
318 class dirs:
312 class dirs:
319 '''a multiset of directory names from a set of file paths'''
313 '''a multiset of directory names from a set of file paths'''
320
314
321 def __init__(self, map, only_tracked=False):
315 def __init__(self, map, only_tracked=False):
322 """
316 """
323 a dict map indicates a dirstate while a list indicates a manifest
317 a dict map indicates a dirstate while a list indicates a manifest
324 """
318 """
325 self._dirs = {}
319 self._dirs = {}
326 addpath = self.addpath
320 addpath = self.addpath
327 if isinstance(map, dict) and only_tracked:
321 if isinstance(map, dict) and only_tracked:
328 for f, s in map.items():
322 for f, s in map.items():
329 if s.state != b'r':
323 if s.state != b'r':
330 addpath(f)
324 addpath(f)
331 elif only_tracked:
325 elif only_tracked:
332 msg = b"`only_tracked` is only supported with a dict source"
326 msg = b"`only_tracked` is only supported with a dict source"
333 raise error.ProgrammingError(msg)
327 raise error.ProgrammingError(msg)
334 else:
328 else:
335 for f in map:
329 for f in map:
336 addpath(f)
330 addpath(f)
337
331
338 def addpath(self, path):
332 def addpath(self, path):
339 # type: (bytes) -> None
333 # type: (bytes) -> None
340 dirs = self._dirs
334 dirs = self._dirs
341 for base in finddirs(path):
335 for base in finddirs(path):
342 if base.endswith(b'/'):
336 if base.endswith(b'/'):
343 raise ValueError(
337 raise ValueError(
344 "found invalid consecutive slashes in path: %r" % base
338 "found invalid consecutive slashes in path: %r" % base
345 )
339 )
346 if base in dirs:
340 if base in dirs:
347 dirs[base] += 1
341 dirs[base] += 1
348 return
342 return
349 dirs[base] = 1
343 dirs[base] = 1
350
344
351 def delpath(self, path):
345 def delpath(self, path):
352 # type: (bytes) -> None
346 # type: (bytes) -> None
353 dirs = self._dirs
347 dirs = self._dirs
354 for base in finddirs(path):
348 for base in finddirs(path):
355 if dirs[base] > 1:
349 if dirs[base] > 1:
356 dirs[base] -= 1
350 dirs[base] -= 1
357 return
351 return
358 del dirs[base]
352 del dirs[base]
359
353
360 def __iter__(self):
354 def __iter__(self):
361 return iter(self._dirs)
355 return iter(self._dirs)
362
356
363 def __contains__(self, d):
357 def __contains__(self, d):
364 # type: (bytes) -> bool
358 # type: (bytes) -> bool
365 return d in self._dirs
359 return d in self._dirs
366
360
367
361
368 if util.safehasattr(parsers, 'dirs'):
362 if util.safehasattr(parsers, 'dirs'):
369 dirs = parsers.dirs
363 dirs = parsers.dirs
370
364
371 if rustdirs is not None:
365 if rustdirs is not None:
372 dirs = rustdirs
366 dirs = rustdirs
373
367
374
368
375 # forward two methods from posixpath that do what we need, but we'd
369 # forward two methods from posixpath that do what we need, but we'd
376 # rather not let our internals know that we're thinking in posix terms
370 # rather not let our internals know that we're thinking in posix terms
377 # - instead we'll let them be oblivious.
371 # - instead we'll let them be oblivious.
378 join = posixpath.join
372 join = posixpath.join
379 dirname = posixpath.dirname # type: Callable[[bytes], bytes]
373 dirname = posixpath.dirname # type: Callable[[bytes], bytes]
General Comments 0
You need to be logged in to leave comments. Login now