##// END OF EJS Templates
pathauditor: move file system specific check in their own function...
Pierre-Yves David -
r27231:6d29ce25 default
parent child Browse files
Show More
@@ -1,203 +1,206 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 util,
12 util,
13 )
13 )
14
14
15 def _lowerclean(s):
15 def _lowerclean(s):
16 return encoding.hfsignoreclean(s.lower())
16 return encoding.hfsignoreclean(s.lower())
17
17
18 class pathauditor(object):
18 class pathauditor(object):
19 '''ensure that a filesystem path contains no banned components.
19 '''ensure that a filesystem path contains no banned components.
20 the following properties of a path are checked:
20 the following properties of a path are checked:
21
21
22 - ends with a directory separator
22 - ends with a directory separator
23 - under top-level .hg
23 - under top-level .hg
24 - starts at the root of a windows drive
24 - starts at the root of a windows drive
25 - contains ".."
25 - contains ".."
26 - traverses a symlink (e.g. a/symlink_here/b)
26 - traverses a symlink (e.g. a/symlink_here/b)
27 - inside a nested repository (a callback can be used to approve
27 - inside a nested repository (a callback can be used to approve
28 some nested repositories, e.g., subrepositories)
28 some nested repositories, e.g., subrepositories)
29 '''
29 '''
30
30
31 def __init__(self, root, callback=None):
31 def __init__(self, root, callback=None):
32 self.audited = set()
32 self.audited = set()
33 self.auditeddir = set()
33 self.auditeddir = set()
34 self.root = root
34 self.root = root
35 self.callback = callback
35 self.callback = callback
36 if os.path.lexists(root) and not util.checkcase(root):
36 if os.path.lexists(root) and not util.checkcase(root):
37 self.normcase = util.normcase
37 self.normcase = util.normcase
38 else:
38 else:
39 self.normcase = lambda x: x
39 self.normcase = lambda x: x
40
40
41 def __call__(self, path):
41 def __call__(self, path):
42 '''Check the relative path.
42 '''Check the relative path.
43 path may contain a pattern (e.g. foodir/**.txt)'''
43 path may contain a pattern (e.g. foodir/**.txt)'''
44
44
45 path = util.localpath(path)
45 path = util.localpath(path)
46 normpath = self.normcase(path)
46 normpath = self.normcase(path)
47 if normpath in self.audited:
47 if normpath in self.audited:
48 return
48 return
49 # AIX ignores "/" at end of path, others raise EISDIR.
49 # AIX ignores "/" at end of path, others raise EISDIR.
50 if util.endswithsep(path):
50 if util.endswithsep(path):
51 raise error.Abort(_("path ends in directory separator: %s") % path)
51 raise error.Abort(_("path ends in directory separator: %s") % path)
52 parts = util.splitpath(path)
52 parts = util.splitpath(path)
53 if (os.path.splitdrive(path)[0]
53 if (os.path.splitdrive(path)[0]
54 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
54 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
55 or os.pardir in parts):
55 or os.pardir in parts):
56 raise error.Abort(_("path contains illegal component: %s") % path)
56 raise error.Abort(_("path contains illegal component: %s") % path)
57 # Windows shortname aliases
57 # Windows shortname aliases
58 for p in parts:
58 for p in parts:
59 if "~" in p:
59 if "~" in p:
60 first, last = p.split("~", 1)
60 first, last = p.split("~", 1)
61 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
61 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
62 raise error.Abort(_("path contains illegal component: %s")
62 raise error.Abort(_("path contains illegal component: %s")
63 % path)
63 % path)
64 if '.hg' in _lowerclean(path):
64 if '.hg' in _lowerclean(path):
65 lparts = [_lowerclean(p.lower()) for p in parts]
65 lparts = [_lowerclean(p.lower()) for p in parts]
66 for p in '.hg', '.hg.':
66 for p in '.hg', '.hg.':
67 if p in lparts[1:]:
67 if p in lparts[1:]:
68 pos = lparts.index(p)
68 pos = lparts.index(p)
69 base = os.path.join(*parts[:pos])
69 base = os.path.join(*parts[:pos])
70 raise error.Abort(_("path '%s' is inside nested repo %r")
70 raise error.Abort(_("path '%s' is inside nested repo %r")
71 % (path, base))
71 % (path, base))
72
72
73 normparts = util.splitpath(normpath)
73 normparts = util.splitpath(normpath)
74 assert len(parts) == len(normparts)
74 assert len(parts) == len(normparts)
75
75
76 parts.pop()
76 parts.pop()
77 normparts.pop()
77 normparts.pop()
78 prefixes = []
78 prefixes = []
79 while parts:
79 while parts:
80 prefix = os.sep.join(parts)
80 prefix = os.sep.join(parts)
81 normprefix = os.sep.join(normparts)
81 normprefix = os.sep.join(normparts)
82 if normprefix in self.auditeddir:
82 if normprefix in self.auditeddir:
83 break
83 break
84 curpath = os.path.join(self.root, prefix)
84 self._checkfs(prefix, path)
85 try:
86 st = os.lstat(curpath)
87 except OSError as err:
88 # EINVAL can be raised as invalid path syntax under win32.
89 # They must be ignored for patterns can be checked too.
90 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
91 raise
92 else:
93 if stat.S_ISLNK(st.st_mode):
94 raise error.Abort(
95 _('path %r traverses symbolic link %r')
96 % (path, prefix))
97 elif (stat.S_ISDIR(st.st_mode) and
98 os.path.isdir(os.path.join(curpath, '.hg'))):
99 if not self.callback or not self.callback(curpath):
100 raise error.Abort(_("path '%s' is inside nested "
101 "repo %r")
102 % (path, prefix))
103 prefixes.append(normprefix)
85 prefixes.append(normprefix)
104 parts.pop()
86 parts.pop()
105 normparts.pop()
87 normparts.pop()
106
88
107 self.audited.add(normpath)
89 self.audited.add(normpath)
108 # only add prefixes to the cache after checking everything: we don't
90 # only add prefixes to the cache after checking everything: we don't
109 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
91 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
110 self.auditeddir.update(prefixes)
92 self.auditeddir.update(prefixes)
111
93
94 def _checkfs(self, prefix, path):
95 """raise exception if a file system backed check fails"""
96 curpath = os.path.join(self.root, prefix)
97 try:
98 st = os.lstat(curpath)
99 except OSError as err:
100 # EINVAL can be raised as invalid path syntax under win32.
101 # They must be ignored for patterns can be checked too.
102 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
103 raise
104 else:
105 if stat.S_ISLNK(st.st_mode):
106 raise error.Abort(
107 _('path %r traverses symbolic link %r')
108 % (path, prefix))
109 elif (stat.S_ISDIR(st.st_mode) and
110 os.path.isdir(os.path.join(curpath, '.hg'))):
111 if not self.callback or not self.callback(curpath):
112 raise error.Abort(_("path '%s' is inside nested "
113 "repo %r") % (path, prefix))
114
112 def check(self, path):
115 def check(self, path):
113 try:
116 try:
114 self(path)
117 self(path)
115 return True
118 return True
116 except (OSError, error.Abort):
119 except (OSError, error.Abort):
117 return False
120 return False
118
121
119 def canonpath(root, cwd, myname, auditor=None):
122 def canonpath(root, cwd, myname, auditor=None):
120 '''return the canonical path of myname, given cwd and root'''
123 '''return the canonical path of myname, given cwd and root'''
121 if util.endswithsep(root):
124 if util.endswithsep(root):
122 rootsep = root
125 rootsep = root
123 else:
126 else:
124 rootsep = root + os.sep
127 rootsep = root + os.sep
125 name = myname
128 name = myname
126 if not os.path.isabs(name):
129 if not os.path.isabs(name):
127 name = os.path.join(root, cwd, name)
130 name = os.path.join(root, cwd, name)
128 name = os.path.normpath(name)
131 name = os.path.normpath(name)
129 if auditor is None:
132 if auditor is None:
130 auditor = pathauditor(root)
133 auditor = pathauditor(root)
131 if name != rootsep and name.startswith(rootsep):
134 if name != rootsep and name.startswith(rootsep):
132 name = name[len(rootsep):]
135 name = name[len(rootsep):]
133 auditor(name)
136 auditor(name)
134 return util.pconvert(name)
137 return util.pconvert(name)
135 elif name == root:
138 elif name == root:
136 return ''
139 return ''
137 else:
140 else:
138 # Determine whether `name' is in the hierarchy at or beneath `root',
141 # Determine whether `name' is in the hierarchy at or beneath `root',
139 # by iterating name=dirname(name) until that causes no change (can't
142 # by iterating name=dirname(name) until that causes no change (can't
140 # check name == '/', because that doesn't work on windows). The list
143 # check name == '/', because that doesn't work on windows). The list
141 # `rel' holds the reversed list of components making up the relative
144 # `rel' holds the reversed list of components making up the relative
142 # file name we want.
145 # file name we want.
143 rel = []
146 rel = []
144 while True:
147 while True:
145 try:
148 try:
146 s = util.samefile(name, root)
149 s = util.samefile(name, root)
147 except OSError:
150 except OSError:
148 s = False
151 s = False
149 if s:
152 if s:
150 if not rel:
153 if not rel:
151 # name was actually the same as root (maybe a symlink)
154 # name was actually the same as root (maybe a symlink)
152 return ''
155 return ''
153 rel.reverse()
156 rel.reverse()
154 name = os.path.join(*rel)
157 name = os.path.join(*rel)
155 auditor(name)
158 auditor(name)
156 return util.pconvert(name)
159 return util.pconvert(name)
157 dirname, basename = util.split(name)
160 dirname, basename = util.split(name)
158 rel.append(basename)
161 rel.append(basename)
159 if dirname == name:
162 if dirname == name:
160 break
163 break
161 name = dirname
164 name = dirname
162
165
163 # A common mistake is to use -R, but specify a file relative to the repo
166 # A common mistake is to use -R, but specify a file relative to the repo
164 # instead of cwd. Detect that case, and provide a hint to the user.
167 # instead of cwd. Detect that case, and provide a hint to the user.
165 hint = None
168 hint = None
166 try:
169 try:
167 if cwd != root:
170 if cwd != root:
168 canonpath(root, root, myname, auditor)
171 canonpath(root, root, myname, auditor)
169 hint = (_("consider using '--cwd %s'")
172 hint = (_("consider using '--cwd %s'")
170 % os.path.relpath(root, cwd))
173 % os.path.relpath(root, cwd))
171 except error.Abort:
174 except error.Abort:
172 pass
175 pass
173
176
174 raise error.Abort(_("%s not under root '%s'") % (myname, root),
177 raise error.Abort(_("%s not under root '%s'") % (myname, root),
175 hint=hint)
178 hint=hint)
176
179
177 def normasprefix(path):
180 def normasprefix(path):
178 '''normalize the specified path as path prefix
181 '''normalize the specified path as path prefix
179
182
180 Returned value can be used safely for "p.startswith(prefix)",
183 Returned value can be used safely for "p.startswith(prefix)",
181 "p[len(prefix):]", and so on.
184 "p[len(prefix):]", and so on.
182
185
183 For efficiency, this expects "path" argument to be already
186 For efficiency, this expects "path" argument to be already
184 normalized by "os.path.normpath", "os.path.realpath", and so on.
187 normalized by "os.path.normpath", "os.path.realpath", and so on.
185
188
186 See also issue3033 for detail about need of this function.
189 See also issue3033 for detail about need of this function.
187
190
188 >>> normasprefix('/foo/bar').replace(os.sep, '/')
191 >>> normasprefix('/foo/bar').replace(os.sep, '/')
189 '/foo/bar/'
192 '/foo/bar/'
190 >>> normasprefix('/').replace(os.sep, '/')
193 >>> normasprefix('/').replace(os.sep, '/')
191 '/'
194 '/'
192 '''
195 '''
193 d, p = os.path.splitdrive(path)
196 d, p = os.path.splitdrive(path)
194 if len(p) != len(os.sep):
197 if len(p) != len(os.sep):
195 return path + os.sep
198 return path + os.sep
196 else:
199 else:
197 return path
200 return path
198
201
199 # forward two methods from posixpath that do what we need, but we'd
202 # forward two methods from posixpath that do what we need, but we'd
200 # rather not let our internals know that we're thinking in posix terms
203 # rather not let our internals know that we're thinking in posix terms
201 # - instead we'll let them be oblivious.
204 # - instead we'll let them be oblivious.
202 join = posixpath.join
205 join = posixpath.join
203 dirname = posixpath.dirname
206 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now