##// END OF EJS Templates
pathutil: mark parent directories as audited as we go...
Martin von Zweigbergk -
r44656:51c86c61 default
parent child Browse files
Show More
@@ -1,342 +1,339 b''
1 1 from __future__ import absolute_import
2 2
3 3 import errno
4 4 import os
5 5 import posixpath
6 6 import stat
7 7
8 8 from .i18n import _
9 9 from . import (
10 10 encoding,
11 11 error,
12 12 policy,
13 13 pycompat,
14 14 util,
15 15 )
16 16
17 17 rustdirs = policy.importrust('dirstate', 'Dirs')
18 18 parsers = policy.importmod('parsers')
19 19
20 20
21 21 def _lowerclean(s):
22 22 return encoding.hfsignoreclean(s.lower())
23 23
24 24
25 25 class pathauditor(object):
26 26 '''ensure that a filesystem path contains no banned components.
27 27 the following properties of a path are checked:
28 28
29 29 - ends with a directory separator
30 30 - under top-level .hg
31 31 - starts at the root of a windows drive
32 32 - contains ".."
33 33
34 34 More check are also done about the file system states:
35 35 - traverses a symlink (e.g. a/symlink_here/b)
36 36 - inside a nested repository (a callback can be used to approve
37 37 some nested repositories, e.g., subrepositories)
38 38
39 39 The file system checks are only done when 'realfs' is set to True (the
40 40 default). They should be disable then we are auditing path for operation on
41 41 stored history.
42 42
43 43 If 'cached' is set to True, audited paths and sub-directories are cached.
44 44 Be careful to not keep the cache of unmanaged directories for long because
45 45 audited paths may be replaced with symlinks.
46 46 '''
47 47
48 48 def __init__(self, root, callback=None, realfs=True, cached=False):
49 49 self.audited = set()
50 50 self.auditeddir = set()
51 51 self.root = root
52 52 self._realfs = realfs
53 53 self._cached = cached
54 54 self.callback = callback
55 55 if os.path.lexists(root) and not util.fscasesensitive(root):
56 56 self.normcase = util.normcase
57 57 else:
58 58 self.normcase = lambda x: x
59 59
60 60 def __call__(self, path, mode=None):
61 61 '''Check the relative path.
62 62 path may contain a pattern (e.g. foodir/**.txt)'''
63 63
64 64 path = util.localpath(path)
65 65 normpath = self.normcase(path)
66 66 if normpath in self.audited:
67 67 return
68 68 # AIX ignores "/" at end of path, others raise EISDIR.
69 69 if util.endswithsep(path):
70 70 raise error.Abort(_(b"path ends in directory separator: %s") % path)
71 71 parts = util.splitpath(path)
72 72 if (
73 73 os.path.splitdrive(path)[0]
74 74 or _lowerclean(parts[0]) in (b'.hg', b'.hg.', b'')
75 75 or pycompat.ospardir in parts
76 76 ):
77 77 raise error.Abort(_(b"path contains illegal component: %s") % path)
78 78 # Windows shortname aliases
79 79 for p in parts:
80 80 if b"~" in p:
81 81 first, last = p.split(b"~", 1)
82 82 if last.isdigit() and first.upper() in [b"HG", b"HG8B6C"]:
83 83 raise error.Abort(
84 84 _(b"path contains illegal component: %s") % path
85 85 )
86 86 if b'.hg' in _lowerclean(path):
87 87 lparts = [_lowerclean(p) for p in parts]
88 88 for p in b'.hg', b'.hg.':
89 89 if p in lparts[1:]:
90 90 pos = lparts.index(p)
91 91 base = os.path.join(*parts[:pos])
92 92 raise error.Abort(
93 93 _(b"path '%s' is inside nested repo %r")
94 94 % (path, pycompat.bytestr(base))
95 95 )
96 96
97 97 normparts = util.splitpath(normpath)
98 98 assert len(parts) == len(normparts)
99 99
100 100 parts.pop()
101 101 normparts.pop()
102 prefixes = []
103 102 # It's important that we check the path parts starting from the root.
104 103 # This means we won't accidentally traverse a symlink into some other
105 104 # filesystem (which is potentially expensive to access).
106 105 for i in range(len(parts)):
107 106 prefix = pycompat.ossep.join(parts[: i + 1])
108 107 normprefix = pycompat.ossep.join(normparts[: i + 1])
109 108 if normprefix in self.auditeddir:
110 109 continue
111 110 if self._realfs:
112 111 self._checkfs(prefix, path)
113 prefixes.append(normprefix)
112 if self._cached:
113 self.auditeddir.add(normprefix)
114 114
115 115 if self._cached:
116 116 self.audited.add(normpath)
117 # only add prefixes to the cache after checking everything: we don't
118 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
119 self.auditeddir.update(prefixes)
120 117
121 118 def _checkfs(self, prefix, path):
122 119 """raise exception if a file system backed check fails"""
123 120 curpath = os.path.join(self.root, prefix)
124 121 try:
125 122 st = os.lstat(curpath)
126 123 except OSError as err:
127 124 # EINVAL can be raised as invalid path syntax under win32.
128 125 # They must be ignored for patterns can be checked too.
129 126 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
130 127 raise
131 128 else:
132 129 if stat.S_ISLNK(st.st_mode):
133 130 msg = _(b'path %r traverses symbolic link %r') % (
134 131 pycompat.bytestr(path),
135 132 pycompat.bytestr(prefix),
136 133 )
137 134 raise error.Abort(msg)
138 135 elif stat.S_ISDIR(st.st_mode) and os.path.isdir(
139 136 os.path.join(curpath, b'.hg')
140 137 ):
141 138 if not self.callback or not self.callback(curpath):
142 139 msg = _(b"path '%s' is inside nested repo %r")
143 140 raise error.Abort(msg % (path, pycompat.bytestr(prefix)))
144 141
145 142 def check(self, path):
146 143 try:
147 144 self(path)
148 145 return True
149 146 except (OSError, error.Abort):
150 147 return False
151 148
152 149
153 150 def canonpath(root, cwd, myname, auditor=None):
154 151 '''return the canonical path of myname, given cwd and root
155 152
156 153 >>> def check(root, cwd, myname):
157 154 ... a = pathauditor(root, realfs=False)
158 155 ... try:
159 156 ... return canonpath(root, cwd, myname, a)
160 157 ... except error.Abort:
161 158 ... return 'aborted'
162 159 >>> def unixonly(root, cwd, myname, expected='aborted'):
163 160 ... if pycompat.iswindows:
164 161 ... return expected
165 162 ... return check(root, cwd, myname)
166 163 >>> def winonly(root, cwd, myname, expected='aborted'):
167 164 ... if not pycompat.iswindows:
168 165 ... return expected
169 166 ... return check(root, cwd, myname)
170 167 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
171 168 'aborted'
172 169 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
173 170 'aborted'
174 171 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
175 172 'aborted'
176 173 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
177 174 ... b'filename')
178 175 'filename'
179 176 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
180 177 'filename'
181 178 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
182 179 ... b'subdir/filename')
183 180 'subdir/filename'
184 181 >>> unixonly(b'/repo', b'/dir', b'filename')
185 182 'aborted'
186 183 >>> unixonly(b'/repo', b'/', b'filename')
187 184 'aborted'
188 185 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
189 186 'filename'
190 187 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
191 188 'filename'
192 189 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
193 190 'subdir/filename'
194 191 '''
195 192 if util.endswithsep(root):
196 193 rootsep = root
197 194 else:
198 195 rootsep = root + pycompat.ossep
199 196 name = myname
200 197 if not os.path.isabs(name):
201 198 name = os.path.join(root, cwd, name)
202 199 name = os.path.normpath(name)
203 200 if auditor is None:
204 201 auditor = pathauditor(root)
205 202 if name != rootsep and name.startswith(rootsep):
206 203 name = name[len(rootsep) :]
207 204 auditor(name)
208 205 return util.pconvert(name)
209 206 elif name == root:
210 207 return b''
211 208 else:
212 209 # Determine whether `name' is in the hierarchy at or beneath `root',
213 210 # by iterating name=dirname(name) until that causes no change (can't
214 211 # check name == '/', because that doesn't work on windows). The list
215 212 # `rel' holds the reversed list of components making up the relative
216 213 # file name we want.
217 214 rel = []
218 215 while True:
219 216 try:
220 217 s = util.samefile(name, root)
221 218 except OSError:
222 219 s = False
223 220 if s:
224 221 if not rel:
225 222 # name was actually the same as root (maybe a symlink)
226 223 return b''
227 224 rel.reverse()
228 225 name = os.path.join(*rel)
229 226 auditor(name)
230 227 return util.pconvert(name)
231 228 dirname, basename = util.split(name)
232 229 rel.append(basename)
233 230 if dirname == name:
234 231 break
235 232 name = dirname
236 233
237 234 # A common mistake is to use -R, but specify a file relative to the repo
238 235 # instead of cwd. Detect that case, and provide a hint to the user.
239 236 hint = None
240 237 try:
241 238 if cwd != root:
242 239 canonpath(root, root, myname, auditor)
243 240 relpath = util.pathto(root, cwd, b'')
244 241 if relpath.endswith(pycompat.ossep):
245 242 relpath = relpath[:-1]
246 243 hint = _(b"consider using '--cwd %s'") % relpath
247 244 except error.Abort:
248 245 pass
249 246
250 247 raise error.Abort(
251 248 _(b"%s not under root '%s'") % (myname, root), hint=hint
252 249 )
253 250
254 251
255 252 def normasprefix(path):
256 253 '''normalize the specified path as path prefix
257 254
258 255 Returned value can be used safely for "p.startswith(prefix)",
259 256 "p[len(prefix):]", and so on.
260 257
261 258 For efficiency, this expects "path" argument to be already
262 259 normalized by "os.path.normpath", "os.path.realpath", and so on.
263 260
264 261 See also issue3033 for detail about need of this function.
265 262
266 263 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
267 264 '/foo/bar/'
268 265 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
269 266 '/'
270 267 '''
271 268 d, p = os.path.splitdrive(path)
272 269 if len(p) != len(pycompat.ossep):
273 270 return path + pycompat.ossep
274 271 else:
275 272 return path
276 273
277 274
278 275 def finddirs(path):
279 276 pos = path.rfind(b'/')
280 277 while pos != -1:
281 278 yield path[:pos]
282 279 pos = path.rfind(b'/', 0, pos)
283 280 yield b''
284 281
285 282
286 283 class dirs(object):
287 284 '''a multiset of directory names from a set of file paths'''
288 285
289 286 def __init__(self, map, skip=None):
290 287 self._dirs = {}
291 288 addpath = self.addpath
292 289 if isinstance(map, dict) and skip is not None:
293 290 for f, s in pycompat.iteritems(map):
294 291 if s[0] != skip:
295 292 addpath(f)
296 293 elif skip is not None:
297 294 raise error.ProgrammingError(
298 295 b"skip character is only supported with a dict source"
299 296 )
300 297 else:
301 298 for f in map:
302 299 addpath(f)
303 300
304 301 def addpath(self, path):
305 302 dirs = self._dirs
306 303 for base in finddirs(path):
307 304 if base.endswith(b'/'):
308 305 raise ValueError(
309 306 "found invalid consecutive slashes in path: %r" % base
310 307 )
311 308 if base in dirs:
312 309 dirs[base] += 1
313 310 return
314 311 dirs[base] = 1
315 312
316 313 def delpath(self, path):
317 314 dirs = self._dirs
318 315 for base in finddirs(path):
319 316 if dirs[base] > 1:
320 317 dirs[base] -= 1
321 318 return
322 319 del dirs[base]
323 320
324 321 def __iter__(self):
325 322 return iter(self._dirs)
326 323
327 324 def __contains__(self, d):
328 325 return d in self._dirs
329 326
330 327
331 328 if util.safehasattr(parsers, 'dirs'):
332 329 dirs = parsers.dirs
333 330
334 331 if rustdirs is not None:
335 332 dirs = rustdirs
336 333
337 334
338 335 # forward two methods from posixpath that do what we need, but we'd
339 336 # rather not let our internals know that we're thinking in posix terms
340 337 # - instead we'll let them be oblivious.
341 338 join = posixpath.join
342 339 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now