##// END OF EJS Templates
pathauditor: drop a redundant call to bytes.lower()...
Martin von Zweigbergk -
r44641:d8442023 default
parent child Browse files
Show More
@@ -1,342 +1,342
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 policy,
12 policy,
13 pycompat,
13 pycompat,
14 util,
14 util,
15 )
15 )
16
16
17 rustdirs = policy.importrust('dirstate', 'Dirs')
17 rustdirs = policy.importrust('dirstate', 'Dirs')
18 parsers = policy.importmod('parsers')
18 parsers = policy.importmod('parsers')
19
19
20
20
21 def _lowerclean(s):
21 def _lowerclean(s):
22 return encoding.hfsignoreclean(s.lower())
22 return encoding.hfsignoreclean(s.lower())
23
23
24
24
25 class pathauditor(object):
25 class pathauditor(object):
26 '''ensure that a filesystem path contains no banned components.
26 '''ensure that a filesystem path contains no banned components.
27 the following properties of a path are checked:
27 the following properties of a path are checked:
28
28
29 - ends with a directory separator
29 - ends with a directory separator
30 - under top-level .hg
30 - under top-level .hg
31 - starts at the root of a windows drive
31 - starts at the root of a windows drive
32 - contains ".."
32 - contains ".."
33
33
34 More check are also done about the file system states:
34 More check are also done about the file system states:
35 - traverses a symlink (e.g. a/symlink_here/b)
35 - traverses a symlink (e.g. a/symlink_here/b)
36 - inside a nested repository (a callback can be used to approve
36 - inside a nested repository (a callback can be used to approve
37 some nested repositories, e.g., subrepositories)
37 some nested repositories, e.g., subrepositories)
38
38
39 The file system checks are only done when 'realfs' is set to True (the
39 The file system checks are only done when 'realfs' is set to True (the
40 default). They should be disable then we are auditing path for operation on
40 default). They should be disable then we are auditing path for operation on
41 stored history.
41 stored history.
42
42
43 If 'cached' is set to True, audited paths and sub-directories are cached.
43 If 'cached' is set to True, audited paths and sub-directories are cached.
44 Be careful to not keep the cache of unmanaged directories for long because
44 Be careful to not keep the cache of unmanaged directories for long because
45 audited paths may be replaced with symlinks.
45 audited paths may be replaced with symlinks.
46 '''
46 '''
47
47
48 def __init__(self, root, callback=None, realfs=True, cached=False):
48 def __init__(self, root, callback=None, realfs=True, cached=False):
49 self.audited = set()
49 self.audited = set()
50 self.auditeddir = set()
50 self.auditeddir = set()
51 self.root = root
51 self.root = root
52 self._realfs = realfs
52 self._realfs = realfs
53 self._cached = cached
53 self._cached = cached
54 self.callback = callback
54 self.callback = callback
55 if os.path.lexists(root) and not util.fscasesensitive(root):
55 if os.path.lexists(root) and not util.fscasesensitive(root):
56 self.normcase = util.normcase
56 self.normcase = util.normcase
57 else:
57 else:
58 self.normcase = lambda x: x
58 self.normcase = lambda x: x
59
59
60 def __call__(self, path, mode=None):
60 def __call__(self, path, mode=None):
61 '''Check the relative path.
61 '''Check the relative path.
62 path may contain a pattern (e.g. foodir/**.txt)'''
62 path may contain a pattern (e.g. foodir/**.txt)'''
63
63
64 path = util.localpath(path)
64 path = util.localpath(path)
65 normpath = self.normcase(path)
65 normpath = self.normcase(path)
66 if normpath in self.audited:
66 if normpath in self.audited:
67 return
67 return
68 # AIX ignores "/" at end of path, others raise EISDIR.
68 # AIX ignores "/" at end of path, others raise EISDIR.
69 if util.endswithsep(path):
69 if util.endswithsep(path):
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
71 parts = util.splitpath(path)
71 parts = util.splitpath(path)
72 if (
72 if (
73 os.path.splitdrive(path)[0]
73 os.path.splitdrive(path)[0]
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
75 or pycompat.ospardir in parts
75 or pycompat.ospardir in parts
76 ):
76 ):
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
78 # Windows shortname aliases
78 # Windows shortname aliases
79 for p in parts:
79 for p in parts:
80 if b"~" in p:
80 if b"~" in p:
81 first, last = p.split(b"~", 1)
81 first, last = p.split(b"~", 1)
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
83 raise error.Abort(
83 raise error.Abort(
84 _(b"path contains illegal component: %s") % path
84 _(b"path contains illegal component: %s") % path
85 )
85 )
86 if b'.hg' in _lowerclean(path):
86 if b'.hg' in _lowerclean(path):
87 lparts = [_lowerclean(p.lower()) for p in parts]
87 lparts = [_lowerclean(p) for p in parts]
88 for p in b'.hg', b'.hg.':
88 for p in b'.hg', b'.hg.':
89 if p in lparts[1:]:
89 if p in lparts[1:]:
90 pos = lparts.index(p)
90 pos = lparts.index(p)
91 base = os.path.join(*parts[:pos])
91 base = os.path.join(*parts[:pos])
92 raise error.Abort(
92 raise error.Abort(
93 _(b"path '%s' is inside nested repo %r")
93 _(b"path '%s' is inside nested repo %r")
94 % (path, pycompat.bytestr(base))
94 % (path, pycompat.bytestr(base))
95 )
95 )
96
96
97 normparts = util.splitpath(normpath)
97 normparts = util.splitpath(normpath)
98 assert len(parts) == len(normparts)
98 assert len(parts) == len(normparts)
99
99
100 parts.pop()
100 parts.pop()
101 normparts.pop()
101 normparts.pop()
102 prefixes = []
102 prefixes = []
103 # It's important that we check the path parts starting from the root.
103 # It's important that we check the path parts starting from the root.
104 # This means we won't accidentally traverse a symlink into some other
104 # This means we won't accidentally traverse a symlink into some other
105 # filesystem (which is potentially expensive to access).
105 # filesystem (which is potentially expensive to access).
106 for i in range(len(parts)):
106 for i in range(len(parts)):
107 prefix = pycompat.ossep.join(parts[: i + 1])
107 prefix = pycompat.ossep.join(parts[: i + 1])
108 normprefix = pycompat.ossep.join(normparts[: i + 1])
108 normprefix = pycompat.ossep.join(normparts[: i + 1])
109 if normprefix in self.auditeddir:
109 if normprefix in self.auditeddir:
110 continue
110 continue
111 if self._realfs:
111 if self._realfs:
112 self._checkfs(prefix, path)
112 self._checkfs(prefix, path)
113 prefixes.append(normprefix)
113 prefixes.append(normprefix)
114
114
115 if self._cached:
115 if self._cached:
116 self.audited.add(normpath)
116 self.audited.add(normpath)
117 # only add prefixes to the cache after checking everything: we don't
117 # only add prefixes to the cache after checking everything: we don't
118 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
118 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
119 self.auditeddir.update(prefixes)
119 self.auditeddir.update(prefixes)
120
120
121 def _checkfs(self, prefix, path):
121 def _checkfs(self, prefix, path):
122 """raise exception if a file system backed check fails"""
122 """raise exception if a file system backed check fails"""
123 curpath = os.path.join(self.root, prefix)
123 curpath = os.path.join(self.root, prefix)
124 try:
124 try:
125 st = os.lstat(curpath)
125 st = os.lstat(curpath)
126 except OSError as err:
126 except OSError as err:
127 # EINVAL can be raised as invalid path syntax under win32.
127 # EINVAL can be raised as invalid path syntax under win32.
128 # They must be ignored for patterns can be checked too.
128 # They must be ignored for patterns can be checked too.
129 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
129 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
130 raise
130 raise
131 else:
131 else:
132 if stat.S_ISLNK(st.st_mode):
132 if stat.S_ISLNK(st.st_mode):
133 msg = _(b'path %r traverses symbolic link %r') % (
133 msg = _(b'path %r traverses symbolic link %r') % (
134 pycompat.bytestr(path),
134 pycompat.bytestr(path),
135 pycompat.bytestr(prefix),
135 pycompat.bytestr(prefix),
136 )
136 )
137 raise error.Abort(msg)
137 raise error.Abort(msg)
138 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
138 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
139 os.path.join(curpath, b'.hg')
139 os.path.join(curpath, b'.hg')
140 ):
140 ):
141 if not self.callback or not self.callback(curpath):
141 if not self.callback or not self.callback(curpath):
142 msg = _(b"path '%s' is inside nested repo %r")
142 msg = _(b"path '%s' is inside nested repo %r")
143 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
143 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
144
144
145 def check(self, path):
145 def check(self, path):
146 try:
146 try:
147 self(path)
147 self(path)
148 return True
148 return True
149 except (OSError, error.Abort):
149 except (OSError, error.Abort):
150 return False
150 return False
151
151
152
152
153 def canonpath(root, cwd, myname, auditor=None):
153 def canonpath(root, cwd, myname, auditor=None):
154 '''return the canonical path of myname, given cwd and root
154 '''return the canonical path of myname, given cwd and root
155
155
156 >>> def check(root, cwd, myname):
156 >>> def check(root, cwd, myname):
157 ... a = pathauditor(root, realfs=False)
157 ... a = pathauditor(root, realfs=False)
158 ... try:
158 ... try:
159 ... return canonpath(root, cwd, myname, a)
159 ... return canonpath(root, cwd, myname, a)
160 ... except error.Abort:
160 ... except error.Abort:
161 ... return 'aborted'
161 ... return 'aborted'
162 >>> def unixonly(root, cwd, myname, expected='aborted'):
162 >>> def unixonly(root, cwd, myname, expected='aborted'):
163 ... if pycompat.iswindows:
163 ... if pycompat.iswindows:
164 ... return expected
164 ... return expected
165 ... return check(root, cwd, myname)
165 ... return check(root, cwd, myname)
166 >>> def winonly(root, cwd, myname, expected='aborted'):
166 >>> def winonly(root, cwd, myname, expected='aborted'):
167 ... if not pycompat.iswindows:
167 ... if not pycompat.iswindows:
168 ... return expected
168 ... return expected
169 ... return check(root, cwd, myname)
169 ... return check(root, cwd, myname)
170 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
170 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
171 'aborted'
171 'aborted'
172 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
172 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
173 'aborted'
173 'aborted'
174 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
174 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
175 'aborted'
175 'aborted'
176 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
176 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
177 ... b'filename')
177 ... b'filename')
178 'filename'
178 'filename'
179 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
179 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
180 'filename'
180 'filename'
181 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
181 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
182 ... b'subdir/filename')
182 ... b'subdir/filename')
183 'subdir/filename'
183 'subdir/filename'
184 >>> unixonly(b'/repo', b'/dir', b'filename')
184 >>> unixonly(b'/repo', b'/dir', b'filename')
185 'aborted'
185 'aborted'
186 >>> unixonly(b'/repo', b'/', b'filename')
186 >>> unixonly(b'/repo', b'/', b'filename')
187 'aborted'
187 'aborted'
188 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
188 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
189 'filename'
189 'filename'
190 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
190 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
191 'filename'
191 'filename'
192 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
192 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
193 'subdir/filename'
193 'subdir/filename'
194 '''
194 '''
195 if util.endswithsep(root):
195 if util.endswithsep(root):
196 rootsep = root
196 rootsep = root
197 else:
197 else:
198 rootsep = root + pycompat.ossep
198 rootsep = root + pycompat.ossep
199 name = myname
199 name = myname
200 if not os.path.isabs(name):
200 if not os.path.isabs(name):
201 name = os.path.join(root, cwd, name)
201 name = os.path.join(root, cwd, name)
202 name = os.path.normpath(name)
202 name = os.path.normpath(name)
203 if auditor is None:
203 if auditor is None:
204 auditor = pathauditor(root)
204 auditor = pathauditor(root)
205 if name != rootsep and name.startswith(rootsep):
205 if name != rootsep and name.startswith(rootsep):
206 name = name[len(rootsep) :]
206 name = name[len(rootsep) :]
207 auditor(name)
207 auditor(name)
208 return util.pconvert(name)
208 return util.pconvert(name)
209 elif name == root:
209 elif name == root:
210 return b''
210 return b''
211 else:
211 else:
212 # Determine whether `name' is in the hierarchy at or beneath `root',
212 # Determine whether `name' is in the hierarchy at or beneath `root',
213 # by iterating name=dirname(name) until that causes no change (can't
213 # by iterating name=dirname(name) until that causes no change (can't
214 # check name == '/', because that doesn't work on windows). The list
214 # check name == '/', because that doesn't work on windows). The list
215 # `rel' holds the reversed list of components making up the relative
215 # `rel' holds the reversed list of components making up the relative
216 # file name we want.
216 # file name we want.
217 rel = []
217 rel = []
218 while True:
218 while True:
219 try:
219 try:
220 s = util.samefile(name, root)
220 s = util.samefile(name, root)
221 except OSError:
221 except OSError:
222 s = False
222 s = False
223 if s:
223 if s:
224 if not rel:
224 if not rel:
225 # name was actually the same as root (maybe a symlink)
225 # name was actually the same as root (maybe a symlink)
226 return b''
226 return b''
227 rel.reverse()
227 rel.reverse()
228 name = os.path.join(*rel)
228 name = os.path.join(*rel)
229 auditor(name)
229 auditor(name)
230 return util.pconvert(name)
230 return util.pconvert(name)
231 dirname, basename = util.split(name)
231 dirname, basename = util.split(name)
232 rel.append(basename)
232 rel.append(basename)
233 if dirname == name:
233 if dirname == name:
234 break
234 break
235 name = dirname
235 name = dirname
236
236
237 # A common mistake is to use -R, but specify a file relative to the repo
237 # A common mistake is to use -R, but specify a file relative to the repo
238 # instead of cwd. Detect that case, and provide a hint to the user.
238 # instead of cwd. Detect that case, and provide a hint to the user.
239 hint = None
239 hint = None
240 try:
240 try:
241 if cwd != root:
241 if cwd != root:
242 canonpath(root, root, myname, auditor)
242 canonpath(root, root, myname, auditor)
243 relpath = util.pathto(root, cwd, b'')
243 relpath = util.pathto(root, cwd, b'')
244 if relpath.endswith(pycompat.ossep):
244 if relpath.endswith(pycompat.ossep):
245 relpath = relpath[:-1]
245 relpath = relpath[:-1]
246 hint = _(b"consider using '--cwd %s'") % relpath
246 hint = _(b"consider using '--cwd %s'") % relpath
247 except error.Abort:
247 except error.Abort:
248 pass
248 pass
249
249
250 raise error.Abort(
250 raise error.Abort(
251 _(b"%s not under root '%s'") % (myname, root), hint=hint
251 _(b"%s not under root '%s'") % (myname, root), hint=hint
252 )
252 )
253
253
254
254
255 def normasprefix(path):
255 def normasprefix(path):
256 '''normalize the specified path as path prefix
256 '''normalize the specified path as path prefix
257
257
258 Returned value can be used safely for "p.startswith(prefix)",
258 Returned value can be used safely for "p.startswith(prefix)",
259 "p[len(prefix):]", and so on.
259 "p[len(prefix):]", and so on.
260
260
261 For efficiency, this expects "path" argument to be already
261 For efficiency, this expects "path" argument to be already
262 normalized by "os.path.normpath", "os.path.realpath", and so on.
262 normalized by "os.path.normpath", "os.path.realpath", and so on.
263
263
264 See also issue3033 for detail about need of this function.
264 See also issue3033 for detail about need of this function.
265
265
266 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
266 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
267 '/foo/bar/'
267 '/foo/bar/'
268 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
268 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
269 '/'
269 '/'
270 '''
270 '''
271 d, p = os.path.splitdrive(path)
271 d, p = os.path.splitdrive(path)
272 if len(p) != len(pycompat.ossep):
272 if len(p) != len(pycompat.ossep):
273 return path + pycompat.ossep
273 return path + pycompat.ossep
274 else:
274 else:
275 return path
275 return path
276
276
277
277
278 def finddirs(path):
278 def finddirs(path):
279 pos = path.rfind(b'/')
279 pos = path.rfind(b'/')
280 while pos != -1:
280 while pos != -1:
281 yield path[:pos]
281 yield path[:pos]
282 pos = path.rfind(b'/', 0, pos)
282 pos = path.rfind(b'/', 0, pos)
283 yield b''
283 yield b''
284
284
285
285
286 class dirs(object):
286 class dirs(object):
287 '''a multiset of directory names from a set of file paths'''
287 '''a multiset of directory names from a set of file paths'''
288
288
289 def __init__(self, map, skip=None):
289 def __init__(self, map, skip=None):
290 self._dirs = {}
290 self._dirs = {}
291 addpath = self.addpath
291 addpath = self.addpath
292 if isinstance(map, dict) and skip is not None:
292 if isinstance(map, dict) and skip is not None:
293 for f, s in pycompat.iteritems(map):
293 for f, s in pycompat.iteritems(map):
294 if s[0] != skip:
294 if s[0] != skip:
295 addpath(f)
295 addpath(f)
296 elif skip is not None:
296 elif skip is not None:
297 raise error.ProgrammingError(
297 raise error.ProgrammingError(
298 b"skip character is only supported with a dict source"
298 b"skip character is only supported with a dict source"
299 )
299 )
300 else:
300 else:
301 for f in map:
301 for f in map:
302 addpath(f)
302 addpath(f)
303
303
304 def addpath(self, path):
304 def addpath(self, path):
305 dirs = self._dirs
305 dirs = self._dirs
306 for base in finddirs(path):
306 for base in finddirs(path):
307 if base.endswith(b'/'):
307 if base.endswith(b'/'):
308 raise ValueError(
308 raise ValueError(
309 "found invalid consecutive slashes in path: %r" % base
309 "found invalid consecutive slashes in path: %r" % base
310 )
310 )
311 if base in dirs:
311 if base in dirs:
312 dirs[base] += 1
312 dirs[base] += 1
313 return
313 return
314 dirs[base] = 1
314 dirs[base] = 1
315
315
316 def delpath(self, path):
316 def delpath(self, path):
317 dirs = self._dirs
317 dirs = self._dirs
318 for base in finddirs(path):
318 for base in finddirs(path):
319 if dirs[base] > 1:
319 if dirs[base] > 1:
320 dirs[base] -= 1
320 dirs[base] -= 1
321 return
321 return
322 del dirs[base]
322 del dirs[base]
323
323
324 def __iter__(self):
324 def __iter__(self):
325 return iter(self._dirs)
325 return iter(self._dirs)
326
326
327 def __contains__(self, d):
327 def __contains__(self, d):
328 return d in self._dirs
328 return d in self._dirs
329
329
330
330
331 if util.safehasattr(parsers, 'dirs'):
331 if util.safehasattr(parsers, 'dirs'):
332 dirs = parsers.dirs
332 dirs = parsers.dirs
333
333
334 if rustdirs is not None:
334 if rustdirs is not None:
335 dirs = rustdirs
335 dirs = rustdirs
336
336
337
337
338 # forward two methods from posixpath that do what we need, but we'd
338 # forward two methods from posixpath that do what we need, but we'd
339 # rather not let our internals know that we're thinking in posix terms
339 # rather not let our internals know that we're thinking in posix terms
340 # - instead we'll let them be oblivious.
340 # - instead we'll let them be oblivious.
341 join = posixpath.join
341 join = posixpath.join
342 dirname = posixpath.dirname
342 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now