##// END OF EJS Templates
pathutil: resurrect comment about path auditing order...
Yuya Nishihara -
r44834:d52e3826 default
parent child Browse files
Show More
@@ -1,339 +1,341 b''
1 1 from __future__ import absolute_import
2 2
3 3 import errno
4 4 import os
5 5 import posixpath
6 6 import stat
7 7
8 8 from .i18n import _
9 9 from . import (
10 10 encoding,
11 11 error,
12 12 policy,
13 13 pycompat,
14 14 util,
15 15 )
16 16
17 17 rustdirs = policy.importrust('dirstate', 'Dirs')
18 18 parsers = policy.importmod('parsers')
19 19
20 20
21 21 def _lowerclean(s):
22 22 return encoding.hfsignoreclean(s.lower())
23 23
24 24
25 25 class pathauditor(object):
26 26 '''ensure that a filesystem path contains no banned components.
27 27 the following properties of a path are checked:
28 28
29 29 - ends with a directory separator
30 30 - under top-level .hg
31 31 - starts at the root of a windows drive
32 32 - contains ".."
33 33
34 34 More check are also done about the file system states:
35 35 - traverses a symlink (e.g. a/symlink_here/b)
36 36 - inside a nested repository (a callback can be used to approve
37 37 some nested repositories, e.g., subrepositories)
38 38
39 39 The file system checks are only done when 'realfs' is set to True (the
40 40 default). They should be disable then we are auditing path for operation on
41 41 stored history.
42 42
43 43 If 'cached' is set to True, audited paths and sub-directories are cached.
44 44 Be careful to not keep the cache of unmanaged directories for long because
45 45 audited paths may be replaced with symlinks.
46 46 '''
47 47
48 48 def __init__(self, root, callback=None, realfs=True, cached=False):
49 49 self.audited = set()
50 50 self.auditeddir = set()
51 51 self.root = root
52 52 self._realfs = realfs
53 53 self._cached = cached
54 54 self.callback = callback
55 55 if os.path.lexists(root) and not util.fscasesensitive(root):
56 56 self.normcase = util.normcase
57 57 else:
58 58 self.normcase = lambda x: x
59 59
60 60 def __call__(self, path, mode=None):
61 61 '''Check the relative path.
62 62 path may contain a pattern (e.g. foodir/**.txt)'''
63 63
64 64 path = util.localpath(path)
65 65 normpath = self.normcase(path)
66 66 if normpath in self.audited:
67 67 return
68 68 # AIX ignores "/" at end of path, others raise EISDIR.
69 69 if util.endswithsep(path):
70 70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
71 71 parts = util.splitpath(path)
72 72 if (
73 73 os.path.splitdrive(path)[0]
74 74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
75 75 or pycompat.ospardir in parts
76 76 ):
77 77 raise error.Abort(_(b"path contains illegal component: %s") % path)
78 78 # Windows shortname aliases
79 79 for p in parts:
80 80 if b"~" in p:
81 81 first, last = p.split(b"~", 1)
82 82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
83 83 raise error.Abort(
84 84 _(b"path contains illegal component: %s") % path
85 85 )
86 86 if b'.hg' in _lowerclean(path):
87 87 lparts = [_lowerclean(p) for p in parts]
88 88 for p in b'.hg', b'.hg.':
89 89 if p in lparts[1:]:
90 90 pos = lparts.index(p)
91 91 base = os.path.join(*parts[:pos])
92 92 raise error.Abort(
93 93 _(b"path '%s' is inside nested repo %r")
94 94 % (path, pycompat.bytestr(base))
95 95 )
96 96
97 97 normparts = util.splitpath(normpath)
98 98 assert len(parts) == len(normparts)
99 99
100 100 parts.pop()
101 101 normparts.pop()
102 102 # It's important that we check the path parts starting from the root.
103 # This means we won't accidentally traverse a symlink into some other
104 # filesystem (which is potentially expensive to access).
103 # We don't want to add "foo/bar/baz" to auditeddir before checking if
104 # there's a "foo/.hg" directory. This also means we won't accidentally
105 # traverse a symlink into some other filesystem (which is potentially
106 # expensive to access).
105 107 for i in range(len(parts)):
106 108 prefix = pycompat.ossep.join(parts[: i + 1])
107 109 normprefix = pycompat.ossep.join(normparts[: i + 1])
108 110 if normprefix in self.auditeddir:
109 111 continue
110 112 if self._realfs:
111 113 self._checkfs(prefix, path)
112 114 if self._cached:
113 115 self.auditeddir.add(normprefix)
114 116
115 117 if self._cached:
116 118 self.audited.add(normpath)
117 119
118 120 def _checkfs(self, prefix, path):
119 121 """raise exception if a file system backed check fails"""
120 122 curpath = os.path.join(self.root, prefix)
121 123 try:
122 124 st = os.lstat(curpath)
123 125 except OSError as err:
124 126 # EINVAL can be raised as invalid path syntax under win32.
125 127 # They must be ignored for patterns can be checked too.
126 128 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
127 129 raise
128 130 else:
129 131 if stat.S_ISLNK(st.st_mode):
130 132 msg = _(b'path %r traverses symbolic link %r') % (
131 133 pycompat.bytestr(path),
132 134 pycompat.bytestr(prefix),
133 135 )
134 136 raise error.Abort(msg)
135 137 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
136 138 os.path.join(curpath, b'.hg')
137 139 ):
138 140 if not self.callback or not self.callback(curpath):
139 141 msg = _(b"path '%s' is inside nested repo %r")
140 142 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
141 143
142 144 def check(self, path):
143 145 try:
144 146 self(path)
145 147 return True
146 148 except (OSError, error.Abort):
147 149 return False
148 150
149 151
150 152 def canonpath(root, cwd, myname, auditor=None):
151 153 '''return the canonical path of myname, given cwd and root
152 154
153 155 >>> def check(root, cwd, myname):
154 156 ... a = pathauditor(root, realfs=False)
155 157 ... try:
156 158 ... return canonpath(root, cwd, myname, a)
157 159 ... except error.Abort:
158 160 ... return 'aborted'
159 161 >>> def unixonly(root, cwd, myname, expected='aborted'):
160 162 ... if pycompat.iswindows:
161 163 ... return expected
162 164 ... return check(root, cwd, myname)
163 165 >>> def winonly(root, cwd, myname, expected='aborted'):
164 166 ... if not pycompat.iswindows:
165 167 ... return expected
166 168 ... return check(root, cwd, myname)
167 169 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
168 170 'aborted'
169 171 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
170 172 'aborted'
171 173 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
172 174 'aborted'
173 175 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
174 176 ... b'filename')
175 177 'filename'
176 178 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
177 179 'filename'
178 180 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
179 181 ... b'subdir/filename')
180 182 'subdir/filename'
181 183 >>> unixonly(b'/repo', b'/dir', b'filename')
182 184 'aborted'
183 185 >>> unixonly(b'/repo', b'/', b'filename')
184 186 'aborted'
185 187 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
186 188 'filename'
187 189 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
188 190 'filename'
189 191 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
190 192 'subdir/filename'
191 193 '''
192 194 if util.endswithsep(root):
193 195 rootsep = root
194 196 else:
195 197 rootsep = root + pycompat.ossep
196 198 name = myname
197 199 if not os.path.isabs(name):
198 200 name = os.path.join(root, cwd, name)
199 201 name = os.path.normpath(name)
200 202 if auditor is None:
201 203 auditor = pathauditor(root)
202 204 if name != rootsep and name.startswith(rootsep):
203 205 name = name[len(rootsep) :]
204 206 auditor(name)
205 207 return util.pconvert(name)
206 208 elif name == root:
207 209 return b''
208 210 else:
209 211 # Determine whether `name' is in the hierarchy at or beneath `root',
210 212 # by iterating name=dirname(name) until that causes no change (can't
211 213 # check name == '/', because that doesn't work on windows). The list
212 214 # `rel' holds the reversed list of components making up the relative
213 215 # file name we want.
214 216 rel = []
215 217 while True:
216 218 try:
217 219 s = util.samefile(name, root)
218 220 except OSError:
219 221 s = False
220 222 if s:
221 223 if not rel:
222 224 # name was actually the same as root (maybe a symlink)
223 225 return b''
224 226 rel.reverse()
225 227 name = os.path.join(*rel)
226 228 auditor(name)
227 229 return util.pconvert(name)
228 230 dirname, basename = util.split(name)
229 231 rel.append(basename)
230 232 if dirname == name:
231 233 break
232 234 name = dirname
233 235
234 236 # A common mistake is to use -R, but specify a file relative to the repo
235 237 # instead of cwd. Detect that case, and provide a hint to the user.
236 238 hint = None
237 239 try:
238 240 if cwd != root:
239 241 canonpath(root, root, myname, auditor)
240 242 relpath = util.pathto(root, cwd, b'')
241 243 if relpath.endswith(pycompat.ossep):
242 244 relpath = relpath[:-1]
243 245 hint = _(b"consider using '--cwd %s'") % relpath
244 246 except error.Abort:
245 247 pass
246 248
247 249 raise error.Abort(
248 250 _(b"%s not under root '%s'") % (myname, root), hint=hint
249 251 )
250 252
251 253
252 254 def normasprefix(path):
253 255 '''normalize the specified path as path prefix
254 256
255 257 Returned value can be used safely for "p.startswith(prefix)",
256 258 "p[len(prefix):]", and so on.
257 259
258 260 For efficiency, this expects "path" argument to be already
259 261 normalized by "os.path.normpath", "os.path.realpath", and so on.
260 262
261 263 See also issue3033 for detail about need of this function.
262 264
263 265 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
264 266 '/foo/bar/'
265 267 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
266 268 '/'
267 269 '''
268 270 d, p = os.path.splitdrive(path)
269 271 if len(p) != len(pycompat.ossep):
270 272 return path + pycompat.ossep
271 273 else:
272 274 return path
273 275
274 276
275 277 def finddirs(path):
276 278 pos = path.rfind(b'/')
277 279 while pos != -1:
278 280 yield path[:pos]
279 281 pos = path.rfind(b'/', 0, pos)
280 282 yield b''
281 283
282 284
283 285 class dirs(object):
284 286 '''a multiset of directory names from a set of file paths'''
285 287
286 288 def __init__(self, map, skip=None):
287 289 self._dirs = {}
288 290 addpath = self.addpath
289 291 if isinstance(map, dict) and skip is not None:
290 292 for f, s in pycompat.iteritems(map):
291 293 if s[0] != skip:
292 294 addpath(f)
293 295 elif skip is not None:
294 296 raise error.ProgrammingError(
295 297 b"skip character is only supported with a dict source"
296 298 )
297 299 else:
298 300 for f in map:
299 301 addpath(f)
300 302
301 303 def addpath(self, path):
302 304 dirs = self._dirs
303 305 for base in finddirs(path):
304 306 if base.endswith(b'/'):
305 307 raise ValueError(
306 308 "found invalid consecutive slashes in path: %r" % base
307 309 )
308 310 if base in dirs:
309 311 dirs[base] += 1
310 312 return
311 313 dirs[base] = 1
312 314
313 315 def delpath(self, path):
314 316 dirs = self._dirs
315 317 for base in finddirs(path):
316 318 if dirs[base] > 1:
317 319 dirs[base] -= 1
318 320 return
319 321 del dirs[base]
320 322
321 323 def __iter__(self):
322 324 return iter(self._dirs)
323 325
324 326 def __contains__(self, d):
325 327 return d in self._dirs
326 328
327 329
328 330 if util.safehasattr(parsers, 'dirs'):
329 331 dirs = parsers.dirs
330 332
331 333 if rustdirs is not None:
332 334 dirs = rustdirs
333 335
334 336
335 337 # forward two methods from posixpath that do what we need, but we'd
336 338 # rather not let our internals know that we're thinking in posix terms
337 339 # - instead we'll let them be oblivious.
338 340 join = posixpath.join
339 341 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now