##// END OF EJS Templates
pathauditor: add a way to skip file system check...
Pierre-Yves David -
r27232:79a86a95 default
parent child Browse files
Show More
@@ -1,206 +1,214 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 util,
12 util,
13 )
13 )
14
14
15 def _lowerclean(s):
15 def _lowerclean(s):
16 return encoding.hfsignoreclean(s.lower())
16 return encoding.hfsignoreclean(s.lower())
17
17
18 class pathauditor(object):
18 class pathauditor(object):
19 '''ensure that a filesystem path contains no banned components.
19 '''ensure that a filesystem path contains no banned components.
20 the following properties of a path are checked:
20 the following properties of a path are checked:
21
21
22 - ends with a directory separator
22 - ends with a directory separator
23 - under top-level .hg
23 - under top-level .hg
24 - starts at the root of a windows drive
24 - starts at the root of a windows drive
25 - contains ".."
25 - contains ".."
26
27 More check are also done about the file system states:
26 - traverses a symlink (e.g. a/symlink_here/b)
28 - traverses a symlink (e.g. a/symlink_here/b)
27 - inside a nested repository (a callback can be used to approve
29 - inside a nested repository (a callback can be used to approve
28 some nested repositories, e.g., subrepositories)
30 some nested repositories, e.g., subrepositories)
31
32 The file system checks are only done when 'realfs' is set to True (the
33 default). They should be disable then we are auditing path for operation on
34 stored history.
29 '''
35 '''
30
36
31 def __init__(self, root, callback=None):
37 def __init__(self, root, callback=None, realfs=True):
32 self.audited = set()
38 self.audited = set()
33 self.auditeddir = set()
39 self.auditeddir = set()
34 self.root = root
40 self.root = root
41 self._realfs = realfs
35 self.callback = callback
42 self.callback = callback
36 if os.path.lexists(root) and not util.checkcase(root):
43 if os.path.lexists(root) and not util.checkcase(root):
37 self.normcase = util.normcase
44 self.normcase = util.normcase
38 else:
45 else:
39 self.normcase = lambda x: x
46 self.normcase = lambda x: x
40
47
41 def __call__(self, path):
48 def __call__(self, path):
42 '''Check the relative path.
49 '''Check the relative path.
43 path may contain a pattern (e.g. foodir/**.txt)'''
50 path may contain a pattern (e.g. foodir/**.txt)'''
44
51
45 path = util.localpath(path)
52 path = util.localpath(path)
46 normpath = self.normcase(path)
53 normpath = self.normcase(path)
47 if normpath in self.audited:
54 if normpath in self.audited:
48 return
55 return
49 # AIX ignores "/" at end of path, others raise EISDIR.
56 # AIX ignores "/" at end of path, others raise EISDIR.
50 if util.endswithsep(path):
57 if util.endswithsep(path):
51 raise error.Abort(_("path ends in directory separator: %s") % path)
58 raise error.Abort(_("path ends in directory separator: %s") % path)
52 parts = util.splitpath(path)
59 parts = util.splitpath(path)
53 if (os.path.splitdrive(path)[0]
60 if (os.path.splitdrive(path)[0]
54 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
61 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
55 or os.pardir in parts):
62 or os.pardir in parts):
56 raise error.Abort(_("path contains illegal component: %s") % path)
63 raise error.Abort(_("path contains illegal component: %s") % path)
57 # Windows shortname aliases
64 # Windows shortname aliases
58 for p in parts:
65 for p in parts:
59 if "~" in p:
66 if "~" in p:
60 first, last = p.split("~", 1)
67 first, last = p.split("~", 1)
61 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
68 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
62 raise error.Abort(_("path contains illegal component: %s")
69 raise error.Abort(_("path contains illegal component: %s")
63 % path)
70 % path)
64 if '.hg' in _lowerclean(path):
71 if '.hg' in _lowerclean(path):
65 lparts = [_lowerclean(p.lower()) for p in parts]
72 lparts = [_lowerclean(p.lower()) for p in parts]
66 for p in '.hg', '.hg.':
73 for p in '.hg', '.hg.':
67 if p in lparts[1:]:
74 if p in lparts[1:]:
68 pos = lparts.index(p)
75 pos = lparts.index(p)
69 base = os.path.join(*parts[:pos])
76 base = os.path.join(*parts[:pos])
70 raise error.Abort(_("path '%s' is inside nested repo %r")
77 raise error.Abort(_("path '%s' is inside nested repo %r")
71 % (path, base))
78 % (path, base))
72
79
73 normparts = util.splitpath(normpath)
80 normparts = util.splitpath(normpath)
74 assert len(parts) == len(normparts)
81 assert len(parts) == len(normparts)
75
82
76 parts.pop()
83 parts.pop()
77 normparts.pop()
84 normparts.pop()
78 prefixes = []
85 prefixes = []
79 while parts:
86 while parts:
80 prefix = os.sep.join(parts)
87 prefix = os.sep.join(parts)
81 normprefix = os.sep.join(normparts)
88 normprefix = os.sep.join(normparts)
82 if normprefix in self.auditeddir:
89 if normprefix in self.auditeddir:
83 break
90 break
84 self._checkfs(prefix, path)
91 if self._realfs:
92 self._checkfs(prefix, path)
85 prefixes.append(normprefix)
93 prefixes.append(normprefix)
86 parts.pop()
94 parts.pop()
87 normparts.pop()
95 normparts.pop()
88
96
89 self.audited.add(normpath)
97 self.audited.add(normpath)
90 # only add prefixes to the cache after checking everything: we don't
98 # only add prefixes to the cache after checking everything: we don't
91 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
99 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
92 self.auditeddir.update(prefixes)
100 self.auditeddir.update(prefixes)
93
101
94 def _checkfs(self, prefix, path):
102 def _checkfs(self, prefix, path):
95 """raise exception if a file system backed check fails"""
103 """raise exception if a file system backed check fails"""
96 curpath = os.path.join(self.root, prefix)
104 curpath = os.path.join(self.root, prefix)
97 try:
105 try:
98 st = os.lstat(curpath)
106 st = os.lstat(curpath)
99 except OSError as err:
107 except OSError as err:
100 # EINVAL can be raised as invalid path syntax under win32.
108 # EINVAL can be raised as invalid path syntax under win32.
101 # They must be ignored for patterns can be checked too.
109 # They must be ignored for patterns can be checked too.
102 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
110 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
103 raise
111 raise
104 else:
112 else:
105 if stat.S_ISLNK(st.st_mode):
113 if stat.S_ISLNK(st.st_mode):
106 raise error.Abort(
114 raise error.Abort(
107 _('path %r traverses symbolic link %r')
115 _('path %r traverses symbolic link %r')
108 % (path, prefix))
116 % (path, prefix))
109 elif (stat.S_ISDIR(st.st_mode) and
117 elif (stat.S_ISDIR(st.st_mode) and
110 os.path.isdir(os.path.join(curpath, '.hg'))):
118 os.path.isdir(os.path.join(curpath, '.hg'))):
111 if not self.callback or not self.callback(curpath):
119 if not self.callback or not self.callback(curpath):
112 raise error.Abort(_("path '%s' is inside nested "
120 raise error.Abort(_("path '%s' is inside nested "
113 "repo %r") % (path, prefix))
121 "repo %r") % (path, prefix))
114
122
115 def check(self, path):
123 def check(self, path):
116 try:
124 try:
117 self(path)
125 self(path)
118 return True
126 return True
119 except (OSError, error.Abort):
127 except (OSError, error.Abort):
120 return False
128 return False
121
129
122 def canonpath(root, cwd, myname, auditor=None):
130 def canonpath(root, cwd, myname, auditor=None):
123 '''return the canonical path of myname, given cwd and root'''
131 '''return the canonical path of myname, given cwd and root'''
124 if util.endswithsep(root):
132 if util.endswithsep(root):
125 rootsep = root
133 rootsep = root
126 else:
134 else:
127 rootsep = root + os.sep
135 rootsep = root + os.sep
128 name = myname
136 name = myname
129 if not os.path.isabs(name):
137 if not os.path.isabs(name):
130 name = os.path.join(root, cwd, name)
138 name = os.path.join(root, cwd, name)
131 name = os.path.normpath(name)
139 name = os.path.normpath(name)
132 if auditor is None:
140 if auditor is None:
133 auditor = pathauditor(root)
141 auditor = pathauditor(root)
134 if name != rootsep and name.startswith(rootsep):
142 if name != rootsep and name.startswith(rootsep):
135 name = name[len(rootsep):]
143 name = name[len(rootsep):]
136 auditor(name)
144 auditor(name)
137 return util.pconvert(name)
145 return util.pconvert(name)
138 elif name == root:
146 elif name == root:
139 return ''
147 return ''
140 else:
148 else:
141 # Determine whether `name' is in the hierarchy at or beneath `root',
149 # Determine whether `name' is in the hierarchy at or beneath `root',
142 # by iterating name=dirname(name) until that causes no change (can't
150 # by iterating name=dirname(name) until that causes no change (can't
143 # check name == '/', because that doesn't work on windows). The list
151 # check name == '/', because that doesn't work on windows). The list
144 # `rel' holds the reversed list of components making up the relative
152 # `rel' holds the reversed list of components making up the relative
145 # file name we want.
153 # file name we want.
146 rel = []
154 rel = []
147 while True:
155 while True:
148 try:
156 try:
149 s = util.samefile(name, root)
157 s = util.samefile(name, root)
150 except OSError:
158 except OSError:
151 s = False
159 s = False
152 if s:
160 if s:
153 if not rel:
161 if not rel:
154 # name was actually the same as root (maybe a symlink)
162 # name was actually the same as root (maybe a symlink)
155 return ''
163 return ''
156 rel.reverse()
164 rel.reverse()
157 name = os.path.join(*rel)
165 name = os.path.join(*rel)
158 auditor(name)
166 auditor(name)
159 return util.pconvert(name)
167 return util.pconvert(name)
160 dirname, basename = util.split(name)
168 dirname, basename = util.split(name)
161 rel.append(basename)
169 rel.append(basename)
162 if dirname == name:
170 if dirname == name:
163 break
171 break
164 name = dirname
172 name = dirname
165
173
166 # A common mistake is to use -R, but specify a file relative to the repo
174 # A common mistake is to use -R, but specify a file relative to the repo
167 # instead of cwd. Detect that case, and provide a hint to the user.
175 # instead of cwd. Detect that case, and provide a hint to the user.
168 hint = None
176 hint = None
169 try:
177 try:
170 if cwd != root:
178 if cwd != root:
171 canonpath(root, root, myname, auditor)
179 canonpath(root, root, myname, auditor)
172 hint = (_("consider using '--cwd %s'")
180 hint = (_("consider using '--cwd %s'")
173 % os.path.relpath(root, cwd))
181 % os.path.relpath(root, cwd))
174 except error.Abort:
182 except error.Abort:
175 pass
183 pass
176
184
177 raise error.Abort(_("%s not under root '%s'") % (myname, root),
185 raise error.Abort(_("%s not under root '%s'") % (myname, root),
178 hint=hint)
186 hint=hint)
179
187
180 def normasprefix(path):
188 def normasprefix(path):
181 '''normalize the specified path as path prefix
189 '''normalize the specified path as path prefix
182
190
183 Returned value can be used safely for "p.startswith(prefix)",
191 Returned value can be used safely for "p.startswith(prefix)",
184 "p[len(prefix):]", and so on.
192 "p[len(prefix):]", and so on.
185
193
186 For efficiency, this expects "path" argument to be already
194 For efficiency, this expects "path" argument to be already
187 normalized by "os.path.normpath", "os.path.realpath", and so on.
195 normalized by "os.path.normpath", "os.path.realpath", and so on.
188
196
189 See also issue3033 for detail about need of this function.
197 See also issue3033 for detail about need of this function.
190
198
191 >>> normasprefix('/foo/bar').replace(os.sep, '/')
199 >>> normasprefix('/foo/bar').replace(os.sep, '/')
192 '/foo/bar/'
200 '/foo/bar/'
193 >>> normasprefix('/').replace(os.sep, '/')
201 >>> normasprefix('/').replace(os.sep, '/')
194 '/'
202 '/'
195 '''
203 '''
196 d, p = os.path.splitdrive(path)
204 d, p = os.path.splitdrive(path)
197 if len(p) != len(os.sep):
205 if len(p) != len(os.sep):
198 return path + os.sep
206 return path + os.sep
199 else:
207 else:
200 return path
208 return path
201
209
202 # forward two methods from posixpath that do what we need, but we'd
210 # forward two methods from posixpath that do what we need, but we'd
203 # rather not let our internals know that we're thinking in posix terms
211 # rather not let our internals know that we're thinking in posix terms
204 # - instead we'll let them be oblivious.
212 # - instead we'll let them be oblivious.
205 join = posixpath.join
213 join = posixpath.join
206 dirname = posixpath.dirname
214 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now