##// END OF EJS Templates
pathutil: resurrect comment about path auditing order...
Yuya Nishihara -
r44834:d52e3826 default
parent child Browse files
Show More
@@ -1,339 +1,341 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 policy,
12 policy,
13 pycompat,
13 pycompat,
14 util,
14 util,
15 )
15 )
16
16
17 rustdirs = policy.importrust('dirstate', 'Dirs')
17 rustdirs = policy.importrust('dirstate', 'Dirs')
18 parsers = policy.importmod('parsers')
18 parsers = policy.importmod('parsers')
19
19
20
20
21 def _lowerclean(s):
21 def _lowerclean(s):
22 return encoding.hfsignoreclean(s.lower())
22 return encoding.hfsignoreclean(s.lower())
23
23
24
24
25 class pathauditor(object):
25 class pathauditor(object):
26 '''ensure that a filesystem path contains no banned components.
26 '''ensure that a filesystem path contains no banned components.
27 the following properties of a path are checked:
27 the following properties of a path are checked:
28
28
29 - ends with a directory separator
29 - ends with a directory separator
30 - under top-level .hg
30 - under top-level .hg
31 - starts at the root of a windows drive
31 - starts at the root of a windows drive
32 - contains ".."
32 - contains ".."
33
33
34 More check are also done about the file system states:
34 More check are also done about the file system states:
35 - traverses a symlink (e.g. a/symlink_here/b)
35 - traverses a symlink (e.g. a/symlink_here/b)
36 - inside a nested repository (a callback can be used to approve
36 - inside a nested repository (a callback can be used to approve
37 some nested repositories, e.g., subrepositories)
37 some nested repositories, e.g., subrepositories)
38
38
39 The file system checks are only done when 'realfs' is set to True (the
39 The file system checks are only done when 'realfs' is set to True (the
40 default). They should be disable then we are auditing path for operation on
40 default). They should be disable then we are auditing path for operation on
41 stored history.
41 stored history.
42
42
43 If 'cached' is set to True, audited paths and sub-directories are cached.
43 If 'cached' is set to True, audited paths and sub-directories are cached.
44 Be careful to not keep the cache of unmanaged directories for long because
44 Be careful to not keep the cache of unmanaged directories for long because
45 audited paths may be replaced with symlinks.
45 audited paths may be replaced with symlinks.
46 '''
46 '''
47
47
48 def __init__(self, root, callback=None, realfs=True, cached=False):
48 def __init__(self, root, callback=None, realfs=True, cached=False):
49 self.audited = set()
49 self.audited = set()
50 self.auditeddir = set()
50 self.auditeddir = set()
51 self.root = root
51 self.root = root
52 self._realfs = realfs
52 self._realfs = realfs
53 self._cached = cached
53 self._cached = cached
54 self.callback = callback
54 self.callback = callback
55 if os.path.lexists(root) and not util.fscasesensitive(root):
55 if os.path.lexists(root) and not util.fscasesensitive(root):
56 self.normcase = util.normcase
56 self.normcase = util.normcase
57 else:
57 else:
58 self.normcase = lambda x: x
58 self.normcase = lambda x: x
59
59
60 def __call__(self, path, mode=None):
60 def __call__(self, path, mode=None):
61 '''Check the relative path.
61 '''Check the relative path.
62 path may contain a pattern (e.g. foodir/**.txt)'''
62 path may contain a pattern (e.g. foodir/**.txt)'''
63
63
64 path = util.localpath(path)
64 path = util.localpath(path)
65 normpath = self.normcase(path)
65 normpath = self.normcase(path)
66 if normpath in self.audited:
66 if normpath in self.audited:
67 return
67 return
68 # AIX ignores "/" at end of path, others raise EISDIR.
68 # AIX ignores "/" at end of path, others raise EISDIR.
69 if util.endswithsep(path):
69 if util.endswithsep(path):
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
71 parts = util.splitpath(path)
71 parts = util.splitpath(path)
72 if (
72 if (
73 os.path.splitdrive(path)[0]
73 os.path.splitdrive(path)[0]
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
75 or pycompat.ospardir in parts
75 or pycompat.ospardir in parts
76 ):
76 ):
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
78 # Windows shortname aliases
78 # Windows shortname aliases
79 for p in parts:
79 for p in parts:
80 if b"~" in p:
80 if b"~" in p:
81 first, last = p.split(b"~", 1)
81 first, last = p.split(b"~", 1)
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
83 raise error.Abort(
83 raise error.Abort(
84 _(b"path contains illegal component: %s") % path
84 _(b"path contains illegal component: %s") % path
85 )
85 )
86 if b'.hg' in _lowerclean(path):
86 if b'.hg' in _lowerclean(path):
87 lparts = [_lowerclean(p) for p in parts]
87 lparts = [_lowerclean(p) for p in parts]
88 for p in b'.hg', b'.hg.':
88 for p in b'.hg', b'.hg.':
89 if p in lparts[1:]:
89 if p in lparts[1:]:
90 pos = lparts.index(p)
90 pos = lparts.index(p)
91 base = os.path.join(*parts[:pos])
91 base = os.path.join(*parts[:pos])
92 raise error.Abort(
92 raise error.Abort(
93 _(b"path '%s' is inside nested repo %r")
93 _(b"path '%s' is inside nested repo %r")
94 % (path, pycompat.bytestr(base))
94 % (path, pycompat.bytestr(base))
95 )
95 )
96
96
97 normparts = util.splitpath(normpath)
97 normparts = util.splitpath(normpath)
98 assert len(parts) == len(normparts)
98 assert len(parts) == len(normparts)
99
99
100 parts.pop()
100 parts.pop()
101 normparts.pop()
101 normparts.pop()
102 # It's important that we check the path parts starting from the root.
102 # It's important that we check the path parts starting from the root.
103 # This means we won't accidentally traverse a symlink into some other
103 # We don't want to add "foo/bar/baz" to auditeddir before checking if
104 # filesystem (which is potentially expensive to access).
104 # there's a "foo/.hg" directory. This also means we won't accidentally
105 # traverse a symlink into some other filesystem (which is potentially
106 # expensive to access).
105 for i in range(len(parts)):
107 for i in range(len(parts)):
106 prefix = pycompat.ossep.join(parts[: i + 1])
108 prefix = pycompat.ossep.join(parts[: i + 1])
107 normprefix = pycompat.ossep.join(normparts[: i + 1])
109 normprefix = pycompat.ossep.join(normparts[: i + 1])
108 if normprefix in self.auditeddir:
110 if normprefix in self.auditeddir:
109 continue
111 continue
110 if self._realfs:
112 if self._realfs:
111 self._checkfs(prefix, path)
113 self._checkfs(prefix, path)
112 if self._cached:
114 if self._cached:
113 self.auditeddir.add(normprefix)
115 self.auditeddir.add(normprefix)
114
116
115 if self._cached:
117 if self._cached:
116 self.audited.add(normpath)
118 self.audited.add(normpath)
117
119
118 def _checkfs(self, prefix, path):
120 def _checkfs(self, prefix, path):
119 """raise exception if a file system backed check fails"""
121 """raise exception if a file system backed check fails"""
120 curpath = os.path.join(self.root, prefix)
122 curpath = os.path.join(self.root, prefix)
121 try:
123 try:
122 st = os.lstat(curpath)
124 st = os.lstat(curpath)
123 except OSError as err:
125 except OSError as err:
124 # EINVAL can be raised as invalid path syntax under win32.
126 # EINVAL can be raised as invalid path syntax under win32.
125 # They must be ignored for patterns can be checked too.
127 # They must be ignored for patterns can be checked too.
126 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
128 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
127 raise
129 raise
128 else:
130 else:
129 if stat.S_ISLNK(st.st_mode):
131 if stat.S_ISLNK(st.st_mode):
130 msg = _(b'path %r traverses symbolic link %r') % (
132 msg = _(b'path %r traverses symbolic link %r') % (
131 pycompat.bytestr(path),
133 pycompat.bytestr(path),
132 pycompat.bytestr(prefix),
134 pycompat.bytestr(prefix),
133 )
135 )
134 raise error.Abort(msg)
136 raise error.Abort(msg)
135 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
137 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
136 os.path.join(curpath, b'.hg')
138 os.path.join(curpath, b'.hg')
137 ):
139 ):
138 if not self.callback or not self.callback(curpath):
140 if not self.callback or not self.callback(curpath):
139 msg = _(b"path '%s' is inside nested repo %r")
141 msg = _(b"path '%s' is inside nested repo %r")
140 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
142 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
141
143
142 def check(self, path):
144 def check(self, path):
143 try:
145 try:
144 self(path)
146 self(path)
145 return True
147 return True
146 except (OSError, error.Abort):
148 except (OSError, error.Abort):
147 return False
149 return False
148
150
149
151
150 def canonpath(root, cwd, myname, auditor=None):
152 def canonpath(root, cwd, myname, auditor=None):
151 '''return the canonical path of myname, given cwd and root
153 '''return the canonical path of myname, given cwd and root
152
154
153 >>> def check(root, cwd, myname):
155 >>> def check(root, cwd, myname):
154 ... a = pathauditor(root, realfs=False)
156 ... a = pathauditor(root, realfs=False)
155 ... try:
157 ... try:
156 ... return canonpath(root, cwd, myname, a)
158 ... return canonpath(root, cwd, myname, a)
157 ... except error.Abort:
159 ... except error.Abort:
158 ... return 'aborted'
160 ... return 'aborted'
159 >>> def unixonly(root, cwd, myname, expected='aborted'):
161 >>> def unixonly(root, cwd, myname, expected='aborted'):
160 ... if pycompat.iswindows:
162 ... if pycompat.iswindows:
161 ... return expected
163 ... return expected
162 ... return check(root, cwd, myname)
164 ... return check(root, cwd, myname)
163 >>> def winonly(root, cwd, myname, expected='aborted'):
165 >>> def winonly(root, cwd, myname, expected='aborted'):
164 ... if not pycompat.iswindows:
166 ... if not pycompat.iswindows:
165 ... return expected
167 ... return expected
166 ... return check(root, cwd, myname)
168 ... return check(root, cwd, myname)
167 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
169 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
168 'aborted'
170 'aborted'
169 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
171 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
170 'aborted'
172 'aborted'
171 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
173 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
172 'aborted'
174 'aborted'
173 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
175 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
174 ... b'filename')
176 ... b'filename')
175 'filename'
177 'filename'
176 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
178 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
177 'filename'
179 'filename'
178 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
180 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
179 ... b'subdir/filename')
181 ... b'subdir/filename')
180 'subdir/filename'
182 'subdir/filename'
181 >>> unixonly(b'/repo', b'/dir', b'filename')
183 >>> unixonly(b'/repo', b'/dir', b'filename')
182 'aborted'
184 'aborted'
183 >>> unixonly(b'/repo', b'/', b'filename')
185 >>> unixonly(b'/repo', b'/', b'filename')
184 'aborted'
186 'aborted'
185 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
187 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
186 'filename'
188 'filename'
187 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
189 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
188 'filename'
190 'filename'
189 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
191 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
190 'subdir/filename'
192 'subdir/filename'
191 '''
193 '''
192 if util.endswithsep(root):
194 if util.endswithsep(root):
193 rootsep = root
195 rootsep = root
194 else:
196 else:
195 rootsep = root + pycompat.ossep
197 rootsep = root + pycompat.ossep
196 name = myname
198 name = myname
197 if not os.path.isabs(name):
199 if not os.path.isabs(name):
198 name = os.path.join(root, cwd, name)
200 name = os.path.join(root, cwd, name)
199 name = os.path.normpath(name)
201 name = os.path.normpath(name)
200 if auditor is None:
202 if auditor is None:
201 auditor = pathauditor(root)
203 auditor = pathauditor(root)
202 if name != rootsep and name.startswith(rootsep):
204 if name != rootsep and name.startswith(rootsep):
203 name = name[len(rootsep) :]
205 name = name[len(rootsep) :]
204 auditor(name)
206 auditor(name)
205 return util.pconvert(name)
207 return util.pconvert(name)
206 elif name == root:
208 elif name == root:
207 return b''
209 return b''
208 else:
210 else:
209 # Determine whether `name' is in the hierarchy at or beneath `root',
211 # Determine whether `name' is in the hierarchy at or beneath `root',
210 # by iterating name=dirname(name) until that causes no change (can't
212 # by iterating name=dirname(name) until that causes no change (can't
211 # check name == '/', because that doesn't work on windows). The list
213 # check name == '/', because that doesn't work on windows). The list
212 # `rel' holds the reversed list of components making up the relative
214 # `rel' holds the reversed list of components making up the relative
213 # file name we want.
215 # file name we want.
214 rel = []
216 rel = []
215 while True:
217 while True:
216 try:
218 try:
217 s = util.samefile(name, root)
219 s = util.samefile(name, root)
218 except OSError:
220 except OSError:
219 s = False
221 s = False
220 if s:
222 if s:
221 if not rel:
223 if not rel:
222 # name was actually the same as root (maybe a symlink)
224 # name was actually the same as root (maybe a symlink)
223 return b''
225 return b''
224 rel.reverse()
226 rel.reverse()
225 name = os.path.join(*rel)
227 name = os.path.join(*rel)
226 auditor(name)
228 auditor(name)
227 return util.pconvert(name)
229 return util.pconvert(name)
228 dirname, basename = util.split(name)
230 dirname, basename = util.split(name)
229 rel.append(basename)
231 rel.append(basename)
230 if dirname == name:
232 if dirname == name:
231 break
233 break
232 name = dirname
234 name = dirname
233
235
234 # A common mistake is to use -R, but specify a file relative to the repo
236 # A common mistake is to use -R, but specify a file relative to the repo
235 # instead of cwd. Detect that case, and provide a hint to the user.
237 # instead of cwd. Detect that case, and provide a hint to the user.
236 hint = None
238 hint = None
237 try:
239 try:
238 if cwd != root:
240 if cwd != root:
239 canonpath(root, root, myname, auditor)
241 canonpath(root, root, myname, auditor)
240 relpath = util.pathto(root, cwd, b'')
242 relpath = util.pathto(root, cwd, b'')
241 if relpath.endswith(pycompat.ossep):
243 if relpath.endswith(pycompat.ossep):
242 relpath = relpath[:-1]
244 relpath = relpath[:-1]
243 hint = _(b"consider using '--cwd %s'") % relpath
245 hint = _(b"consider using '--cwd %s'") % relpath
244 except error.Abort:
246 except error.Abort:
245 pass
247 pass
246
248
247 raise error.Abort(
249 raise error.Abort(
248 _(b"%s not under root '%s'") % (myname, root), hint=hint
250 _(b"%s not under root '%s'") % (myname, root), hint=hint
249 )
251 )
250
252
251
253
252 def normasprefix(path):
254 def normasprefix(path):
253 '''normalize the specified path as path prefix
255 '''normalize the specified path as path prefix
254
256
255 Returned value can be used safely for "p.startswith(prefix)",
257 Returned value can be used safely for "p.startswith(prefix)",
256 "p[len(prefix):]", and so on.
258 "p[len(prefix):]", and so on.
257
259
258 For efficiency, this expects "path" argument to be already
260 For efficiency, this expects "path" argument to be already
259 normalized by "os.path.normpath", "os.path.realpath", and so on.
261 normalized by "os.path.normpath", "os.path.realpath", and so on.
260
262
261 See also issue3033 for detail about need of this function.
263 See also issue3033 for detail about need of this function.
262
264
263 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
265 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
264 '/foo/bar/'
266 '/foo/bar/'
265 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
267 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
266 '/'
268 '/'
267 '''
269 '''
268 d, p = os.path.splitdrive(path)
270 d, p = os.path.splitdrive(path)
269 if len(p) != len(pycompat.ossep):
271 if len(p) != len(pycompat.ossep):
270 return path + pycompat.ossep
272 return path + pycompat.ossep
271 else:
273 else:
272 return path
274 return path
273
275
274
276
275 def finddirs(path):
277 def finddirs(path):
276 pos = path.rfind(b'/')
278 pos = path.rfind(b'/')
277 while pos != -1:
279 while pos != -1:
278 yield path[:pos]
280 yield path[:pos]
279 pos = path.rfind(b'/', 0, pos)
281 pos = path.rfind(b'/', 0, pos)
280 yield b''
282 yield b''
281
283
282
284
283 class dirs(object):
285 class dirs(object):
284 '''a multiset of directory names from a set of file paths'''
286 '''a multiset of directory names from a set of file paths'''
285
287
286 def __init__(self, map, skip=None):
288 def __init__(self, map, skip=None):
287 self._dirs = {}
289 self._dirs = {}
288 addpath = self.addpath
290 addpath = self.addpath
289 if isinstance(map, dict) and skip is not None:
291 if isinstance(map, dict) and skip is not None:
290 for f, s in pycompat.iteritems(map):
292 for f, s in pycompat.iteritems(map):
291 if s[0] != skip:
293 if s[0] != skip:
292 addpath(f)
294 addpath(f)
293 elif skip is not None:
295 elif skip is not None:
294 raise error.ProgrammingError(
296 raise error.ProgrammingError(
295 b"skip character is only supported with a dict source"
297 b"skip character is only supported with a dict source"
296 )
298 )
297 else:
299 else:
298 for f in map:
300 for f in map:
299 addpath(f)
301 addpath(f)
300
302
301 def addpath(self, path):
303 def addpath(self, path):
302 dirs = self._dirs
304 dirs = self._dirs
303 for base in finddirs(path):
305 for base in finddirs(path):
304 if base.endswith(b'/'):
306 if base.endswith(b'/'):
305 raise ValueError(
307 raise ValueError(
306 "found invalid consecutive slashes in path: %r" % base
308 "found invalid consecutive slashes in path: %r" % base
307 )
309 )
308 if base in dirs:
310 if base in dirs:
309 dirs[base] += 1
311 dirs[base] += 1
310 return
312 return
311 dirs[base] = 1
313 dirs[base] = 1
312
314
313 def delpath(self, path):
315 def delpath(self, path):
314 dirs = self._dirs
316 dirs = self._dirs
315 for base in finddirs(path):
317 for base in finddirs(path):
316 if dirs[base] > 1:
318 if dirs[base] > 1:
317 dirs[base] -= 1
319 dirs[base] -= 1
318 return
320 return
319 del dirs[base]
321 del dirs[base]
320
322
321 def __iter__(self):
323 def __iter__(self):
322 return iter(self._dirs)
324 return iter(self._dirs)
323
325
324 def __contains__(self, d):
326 def __contains__(self, d):
325 return d in self._dirs
327 return d in self._dirs
326
328
327
329
328 if util.safehasattr(parsers, 'dirs'):
330 if util.safehasattr(parsers, 'dirs'):
329 dirs = parsers.dirs
331 dirs = parsers.dirs
330
332
331 if rustdirs is not None:
333 if rustdirs is not None:
332 dirs = rustdirs
334 dirs = rustdirs
333
335
334
336
335 # forward two methods from posixpath that do what we need, but we'd
337 # forward two methods from posixpath that do what we need, but we'd
336 # rather not let our internals know that we're thinking in posix terms
338 # rather not let our internals know that we're thinking in posix terms
337 # - instead we'll let them be oblivious.
339 # - instead we'll let them be oblivious.
338 join = posixpath.join
340 join = posixpath.join
339 dirname = posixpath.dirname
341 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now