##// END OF EJS Templates
pathutil: mark parent directories as audited as we go...
Martin von Zweigbergk -
r44656:51c86c61 default
parent child Browse files
Show More
@@ -1,342 +1,339 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 policy,
12 policy,
13 pycompat,
13 pycompat,
14 util,
14 util,
15 )
15 )
16
16
17 rustdirs = policy.importrust('dirstate', 'Dirs')
17 rustdirs = policy.importrust('dirstate', 'Dirs')
18 parsers = policy.importmod('parsers')
18 parsers = policy.importmod('parsers')
19
19
20
20
21 def _lowerclean(s):
21 def _lowerclean(s):
22 return encoding.hfsignoreclean(s.lower())
22 return encoding.hfsignoreclean(s.lower())
23
23
24
24
25 class pathauditor(object):
25 class pathauditor(object):
26 '''ensure that a filesystem path contains no banned components.
26 '''ensure that a filesystem path contains no banned components.
27 the following properties of a path are checked:
27 the following properties of a path are checked:
28
28
29 - ends with a directory separator
29 - ends with a directory separator
30 - under top-level .hg
30 - under top-level .hg
31 - starts at the root of a windows drive
31 - starts at the root of a windows drive
32 - contains ".."
32 - contains ".."
33
33
34 More check are also done about the file system states:
34 More check are also done about the file system states:
35 - traverses a symlink (e.g. a/symlink_here/b)
35 - traverses a symlink (e.g. a/symlink_here/b)
36 - inside a nested repository (a callback can be used to approve
36 - inside a nested repository (a callback can be used to approve
37 some nested repositories, e.g., subrepositories)
37 some nested repositories, e.g., subrepositories)
38
38
39 The file system checks are only done when 'realfs' is set to True (the
39 The file system checks are only done when 'realfs' is set to True (the
40 default). They should be disable then we are auditing path for operation on
40 default). They should be disable then we are auditing path for operation on
41 stored history.
41 stored history.
42
42
43 If 'cached' is set to True, audited paths and sub-directories are cached.
43 If 'cached' is set to True, audited paths and sub-directories are cached.
44 Be careful to not keep the cache of unmanaged directories for long because
44 Be careful to not keep the cache of unmanaged directories for long because
45 audited paths may be replaced with symlinks.
45 audited paths may be replaced with symlinks.
46 '''
46 '''
47
47
48 def __init__(self, root, callback=None, realfs=True, cached=False):
48 def __init__(self, root, callback=None, realfs=True, cached=False):
49 self.audited = set()
49 self.audited = set()
50 self.auditeddir = set()
50 self.auditeddir = set()
51 self.root = root
51 self.root = root
52 self._realfs = realfs
52 self._realfs = realfs
53 self._cached = cached
53 self._cached = cached
54 self.callback = callback
54 self.callback = callback
55 if os.path.lexists(root) and not util.fscasesensitive(root):
55 if os.path.lexists(root) and not util.fscasesensitive(root):
56 self.normcase = util.normcase
56 self.normcase = util.normcase
57 else:
57 else:
58 self.normcase = lambda x: x
58 self.normcase = lambda x: x
59
59
60 def __call__(self, path, mode=None):
60 def __call__(self, path, mode=None):
61 '''Check the relative path.
61 '''Check the relative path.
62 path may contain a pattern (e.g. foodir/**.txt)'''
62 path may contain a pattern (e.g. foodir/**.txt)'''
63
63
64 path = util.localpath(path)
64 path = util.localpath(path)
65 normpath = self.normcase(path)
65 normpath = self.normcase(path)
66 if normpath in self.audited:
66 if normpath in self.audited:
67 return
67 return
68 # AIX ignores "/" at end of path, others raise EISDIR.
68 # AIX ignores "/" at end of path, others raise EISDIR.
69 if util.endswithsep(path):
69 if util.endswithsep(path):
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
71 parts = util.splitpath(path)
71 parts = util.splitpath(path)
72 if (
72 if (
73 os.path.splitdrive(path)[0]
73 os.path.splitdrive(path)[0]
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
75 or pycompat.ospardir in parts
75 or pycompat.ospardir in parts
76 ):
76 ):
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
77 raise error.Abort(_(b"path contains illegal component: %s") % path)
78 # Windows shortname aliases
78 # Windows shortname aliases
79 for p in parts:
79 for p in parts:
80 if b"~" in p:
80 if b"~" in p:
81 first, last = p.split(b"~", 1)
81 first, last = p.split(b"~", 1)
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
83 raise error.Abort(
83 raise error.Abort(
84 _(b"path contains illegal component: %s") % path
84 _(b"path contains illegal component: %s") % path
85 )
85 )
86 if b'.hg' in _lowerclean(path):
86 if b'.hg' in _lowerclean(path):
87 lparts = [_lowerclean(p) for p in parts]
87 lparts = [_lowerclean(p) for p in parts]
88 for p in b'.hg', b'.hg.':
88 for p in b'.hg', b'.hg.':
89 if p in lparts[1:]:
89 if p in lparts[1:]:
90 pos = lparts.index(p)
90 pos = lparts.index(p)
91 base = os.path.join(*parts[:pos])
91 base = os.path.join(*parts[:pos])
92 raise error.Abort(
92 raise error.Abort(
93 _(b"path '%s' is inside nested repo %r")
93 _(b"path '%s' is inside nested repo %r")
94 % (path, pycompat.bytestr(base))
94 % (path, pycompat.bytestr(base))
95 )
95 )
96
96
97 normparts = util.splitpath(normpath)
97 normparts = util.splitpath(normpath)
98 assert len(parts) == len(normparts)
98 assert len(parts) == len(normparts)
99
99
100 parts.pop()
100 parts.pop()
101 normparts.pop()
101 normparts.pop()
102 prefixes = []
103 # It's important that we check the path parts starting from the root.
102 # It's important that we check the path parts starting from the root.
104 # This means we won't accidentally traverse a symlink into some other
103 # This means we won't accidentally traverse a symlink into some other
105 # filesystem (which is potentially expensive to access).
104 # filesystem (which is potentially expensive to access).
106 for i in range(len(parts)):
105 for i in range(len(parts)):
107 prefix = pycompat.ossep.join(parts[: i + 1])
106 prefix = pycompat.ossep.join(parts[: i + 1])
108 normprefix = pycompat.ossep.join(normparts[: i + 1])
107 normprefix = pycompat.ossep.join(normparts[: i + 1])
109 if normprefix in self.auditeddir:
108 if normprefix in self.auditeddir:
110 continue
109 continue
111 if self._realfs:
110 if self._realfs:
112 self._checkfs(prefix, path)
111 self._checkfs(prefix, path)
113 prefixes.append(normprefix)
112 if self._cached:
113 self.auditeddir.add(normprefix)
114
114
115 if self._cached:
115 if self._cached:
116 self.audited.add(normpath)
116 self.audited.add(normpath)
117 # only add prefixes to the cache after checking everything: we don't
118 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
119 self.auditeddir.update(prefixes)
120
117
121 def _checkfs(self, prefix, path):
118 def _checkfs(self, prefix, path):
122 """raise exception if a file system backed check fails"""
119 """raise exception if a file system backed check fails"""
123 curpath = os.path.join(self.root, prefix)
120 curpath = os.path.join(self.root, prefix)
124 try:
121 try:
125 st = os.lstat(curpath)
122 st = os.lstat(curpath)
126 except OSError as err:
123 except OSError as err:
127 # EINVAL can be raised as invalid path syntax under win32.
124 # EINVAL can be raised as invalid path syntax under win32.
128 # They must be ignored for patterns can be checked too.
125 # They must be ignored for patterns can be checked too.
129 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
126 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
130 raise
127 raise
131 else:
128 else:
132 if stat.S_ISLNK(st.st_mode):
129 if stat.S_ISLNK(st.st_mode):
133 msg = _(b'path %r traverses symbolic link %r') % (
130 msg = _(b'path %r traverses symbolic link %r') % (
134 pycompat.bytestr(path),
131 pycompat.bytestr(path),
135 pycompat.bytestr(prefix),
132 pycompat.bytestr(prefix),
136 )
133 )
137 raise error.Abort(msg)
134 raise error.Abort(msg)
138 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
135 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
139 os.path.join(curpath, b'.hg')
136 os.path.join(curpath, b'.hg')
140 ):
137 ):
141 if not self.callback or not self.callback(curpath):
138 if not self.callback or not self.callback(curpath):
142 msg = _(b"path '%s' is inside nested repo %r")
139 msg = _(b"path '%s' is inside nested repo %r")
143 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
140 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
144
141
145 def check(self, path):
142 def check(self, path):
146 try:
143 try:
147 self(path)
144 self(path)
148 return True
145 return True
149 except (OSError, error.Abort):
146 except (OSError, error.Abort):
150 return False
147 return False
151
148
152
149
153 def canonpath(root, cwd, myname, auditor=None):
150 def canonpath(root, cwd, myname, auditor=None):
154 '''return the canonical path of myname, given cwd and root
151 '''return the canonical path of myname, given cwd and root
155
152
156 >>> def check(root, cwd, myname):
153 >>> def check(root, cwd, myname):
157 ... a = pathauditor(root, realfs=False)
154 ... a = pathauditor(root, realfs=False)
158 ... try:
155 ... try:
159 ... return canonpath(root, cwd, myname, a)
156 ... return canonpath(root, cwd, myname, a)
160 ... except error.Abort:
157 ... except error.Abort:
161 ... return 'aborted'
158 ... return 'aborted'
162 >>> def unixonly(root, cwd, myname, expected='aborted'):
159 >>> def unixonly(root, cwd, myname, expected='aborted'):
163 ... if pycompat.iswindows:
160 ... if pycompat.iswindows:
164 ... return expected
161 ... return expected
165 ... return check(root, cwd, myname)
162 ... return check(root, cwd, myname)
166 >>> def winonly(root, cwd, myname, expected='aborted'):
163 >>> def winonly(root, cwd, myname, expected='aborted'):
167 ... if not pycompat.iswindows:
164 ... if not pycompat.iswindows:
168 ... return expected
165 ... return expected
169 ... return check(root, cwd, myname)
166 ... return check(root, cwd, myname)
170 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
167 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
171 'aborted'
168 'aborted'
172 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
169 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
173 'aborted'
170 'aborted'
174 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
171 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
175 'aborted'
172 'aborted'
176 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
173 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
177 ... b'filename')
174 ... b'filename')
178 'filename'
175 'filename'
179 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
176 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
180 'filename'
177 'filename'
181 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
178 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
182 ... b'subdir/filename')
179 ... b'subdir/filename')
183 'subdir/filename'
180 'subdir/filename'
184 >>> unixonly(b'/repo', b'/dir', b'filename')
181 >>> unixonly(b'/repo', b'/dir', b'filename')
185 'aborted'
182 'aborted'
186 >>> unixonly(b'/repo', b'/', b'filename')
183 >>> unixonly(b'/repo', b'/', b'filename')
187 'aborted'
184 'aborted'
188 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
185 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
189 'filename'
186 'filename'
190 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
187 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
191 'filename'
188 'filename'
192 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
189 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
193 'subdir/filename'
190 'subdir/filename'
194 '''
191 '''
195 if util.endswithsep(root):
192 if util.endswithsep(root):
196 rootsep = root
193 rootsep = root
197 else:
194 else:
198 rootsep = root + pycompat.ossep
195 rootsep = root + pycompat.ossep
199 name = myname
196 name = myname
200 if not os.path.isabs(name):
197 if not os.path.isabs(name):
201 name = os.path.join(root, cwd, name)
198 name = os.path.join(root, cwd, name)
202 name = os.path.normpath(name)
199 name = os.path.normpath(name)
203 if auditor is None:
200 if auditor is None:
204 auditor = pathauditor(root)
201 auditor = pathauditor(root)
205 if name != rootsep and name.startswith(rootsep):
202 if name != rootsep and name.startswith(rootsep):
206 name = name[len(rootsep) :]
203 name = name[len(rootsep) :]
207 auditor(name)
204 auditor(name)
208 return util.pconvert(name)
205 return util.pconvert(name)
209 elif name == root:
206 elif name == root:
210 return b''
207 return b''
211 else:
208 else:
212 # Determine whether `name' is in the hierarchy at or beneath `root',
209 # Determine whether `name' is in the hierarchy at or beneath `root',
213 # by iterating name=dirname(name) until that causes no change (can't
210 # by iterating name=dirname(name) until that causes no change (can't
214 # check name == '/', because that doesn't work on windows). The list
211 # check name == '/', because that doesn't work on windows). The list
215 # `rel' holds the reversed list of components making up the relative
212 # `rel' holds the reversed list of components making up the relative
216 # file name we want.
213 # file name we want.
217 rel = []
214 rel = []
218 while True:
215 while True:
219 try:
216 try:
220 s = util.samefile(name, root)
217 s = util.samefile(name, root)
221 except OSError:
218 except OSError:
222 s = False
219 s = False
223 if s:
220 if s:
224 if not rel:
221 if not rel:
225 # name was actually the same as root (maybe a symlink)
222 # name was actually the same as root (maybe a symlink)
226 return b''
223 return b''
227 rel.reverse()
224 rel.reverse()
228 name = os.path.join(*rel)
225 name = os.path.join(*rel)
229 auditor(name)
226 auditor(name)
230 return util.pconvert(name)
227 return util.pconvert(name)
231 dirname, basename = util.split(name)
228 dirname, basename = util.split(name)
232 rel.append(basename)
229 rel.append(basename)
233 if dirname == name:
230 if dirname == name:
234 break
231 break
235 name = dirname
232 name = dirname
236
233
237 # A common mistake is to use -R, but specify a file relative to the repo
234 # A common mistake is to use -R, but specify a file relative to the repo
238 # instead of cwd. Detect that case, and provide a hint to the user.
235 # instead of cwd. Detect that case, and provide a hint to the user.
239 hint = None
236 hint = None
240 try:
237 try:
241 if cwd != root:
238 if cwd != root:
242 canonpath(root, root, myname, auditor)
239 canonpath(root, root, myname, auditor)
243 relpath = util.pathto(root, cwd, b'')
240 relpath = util.pathto(root, cwd, b'')
244 if relpath.endswith(pycompat.ossep):
241 if relpath.endswith(pycompat.ossep):
245 relpath = relpath[:-1]
242 relpath = relpath[:-1]
246 hint = _(b"consider using '--cwd %s'") % relpath
243 hint = _(b"consider using '--cwd %s'") % relpath
247 except error.Abort:
244 except error.Abort:
248 pass
245 pass
249
246
250 raise error.Abort(
247 raise error.Abort(
251 _(b"%s not under root '%s'") % (myname, root), hint=hint
248 _(b"%s not under root '%s'") % (myname, root), hint=hint
252 )
249 )
253
250
254
251
255 def normasprefix(path):
252 def normasprefix(path):
256 '''normalize the specified path as path prefix
253 '''normalize the specified path as path prefix
257
254
258 Returned value can be used safely for "p.startswith(prefix)",
255 Returned value can be used safely for "p.startswith(prefix)",
259 "p[len(prefix):]", and so on.
256 "p[len(prefix):]", and so on.
260
257
261 For efficiency, this expects "path" argument to be already
258 For efficiency, this expects "path" argument to be already
262 normalized by "os.path.normpath", "os.path.realpath", and so on.
259 normalized by "os.path.normpath", "os.path.realpath", and so on.
263
260
264 See also issue3033 for detail about need of this function.
261 See also issue3033 for detail about need of this function.
265
262
266 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
263 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
267 '/foo/bar/'
264 '/foo/bar/'
268 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
265 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
269 '/'
266 '/'
270 '''
267 '''
271 d, p = os.path.splitdrive(path)
268 d, p = os.path.splitdrive(path)
272 if len(p) != len(pycompat.ossep):
269 if len(p) != len(pycompat.ossep):
273 return path + pycompat.ossep
270 return path + pycompat.ossep
274 else:
271 else:
275 return path
272 return path
276
273
277
274
278 def finddirs(path):
275 def finddirs(path):
279 pos = path.rfind(b'/')
276 pos = path.rfind(b'/')
280 while pos != -1:
277 while pos != -1:
281 yield path[:pos]
278 yield path[:pos]
282 pos = path.rfind(b'/', 0, pos)
279 pos = path.rfind(b'/', 0, pos)
283 yield b''
280 yield b''
284
281
285
282
286 class dirs(object):
283 class dirs(object):
287 '''a multiset of directory names from a set of file paths'''
284 '''a multiset of directory names from a set of file paths'''
288
285
289 def __init__(self, map, skip=None):
286 def __init__(self, map, skip=None):
290 self._dirs = {}
287 self._dirs = {}
291 addpath = self.addpath
288 addpath = self.addpath
292 if isinstance(map, dict) and skip is not None:
289 if isinstance(map, dict) and skip is not None:
293 for f, s in pycompat.iteritems(map):
290 for f, s in pycompat.iteritems(map):
294 if s[0] != skip:
291 if s[0] != skip:
295 addpath(f)
292 addpath(f)
296 elif skip is not None:
293 elif skip is not None:
297 raise error.ProgrammingError(
294 raise error.ProgrammingError(
298 b"skip character is only supported with a dict source"
295 b"skip character is only supported with a dict source"
299 )
296 )
300 else:
297 else:
301 for f in map:
298 for f in map:
302 addpath(f)
299 addpath(f)
303
300
304 def addpath(self, path):
301 def addpath(self, path):
305 dirs = self._dirs
302 dirs = self._dirs
306 for base in finddirs(path):
303 for base in finddirs(path):
307 if base.endswith(b'/'):
304 if base.endswith(b'/'):
308 raise ValueError(
305 raise ValueError(
309 "found invalid consecutive slashes in path: %r" % base
306 "found invalid consecutive slashes in path: %r" % base
310 )
307 )
311 if base in dirs:
308 if base in dirs:
312 dirs[base] += 1
309 dirs[base] += 1
313 return
310 return
314 dirs[base] = 1
311 dirs[base] = 1
315
312
316 def delpath(self, path):
313 def delpath(self, path):
317 dirs = self._dirs
314 dirs = self._dirs
318 for base in finddirs(path):
315 for base in finddirs(path):
319 if dirs[base] > 1:
316 if dirs[base] > 1:
320 dirs[base] -= 1
317 dirs[base] -= 1
321 return
318 return
322 del dirs[base]
319 del dirs[base]
323
320
324 def __iter__(self):
321 def __iter__(self):
325 return iter(self._dirs)
322 return iter(self._dirs)
326
323
327 def __contains__(self, d):
324 def __contains__(self, d):
328 return d in self._dirs
325 return d in self._dirs
329
326
330
327
331 if util.safehasattr(parsers, 'dirs'):
328 if util.safehasattr(parsers, 'dirs'):
332 dirs = parsers.dirs
329 dirs = parsers.dirs
333
330
334 if rustdirs is not None:
331 if rustdirs is not None:
335 dirs = rustdirs
332 dirs = rustdirs
336
333
337
334
338 # forward two methods from posixpath that do what we need, but we'd
335 # forward two methods from posixpath that do what we need, but we'd
339 # rather not let our internals know that we're thinking in posix terms
336 # rather not let our internals know that we're thinking in posix terms
340 # - instead we'll let them be oblivious.
337 # - instead we'll let them be oblivious.
341 join = posixpath.join
338 join = posixpath.join
342 dirname = posixpath.dirname
339 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now