##// END OF EJS Templates
pathutil: add doctests for canonpath()...
Matt Harbison -
r34981:705d0f2b stable
parent child Browse files
Show More
@@ -1,223 +1,263
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import errno
3 import errno
4 import os
4 import os
5 import posixpath
5 import posixpath
6 import stat
6 import stat
7
7
8 from .i18n import _
8 from .i18n import _
9 from . import (
9 from . import (
10 encoding,
10 encoding,
11 error,
11 error,
12 pycompat,
12 pycompat,
13 util,
13 util,
14 )
14 )
15
15
16 def _lowerclean(s):
16 def _lowerclean(s):
17 return encoding.hfsignoreclean(s.lower())
17 return encoding.hfsignoreclean(s.lower())
18
18
19 class pathauditor(object):
19 class pathauditor(object):
20 '''ensure that a filesystem path contains no banned components.
20 '''ensure that a filesystem path contains no banned components.
21 the following properties of a path are checked:
21 the following properties of a path are checked:
22
22
23 - ends with a directory separator
23 - ends with a directory separator
24 - under top-level .hg
24 - under top-level .hg
25 - starts at the root of a windows drive
25 - starts at the root of a windows drive
26 - contains ".."
26 - contains ".."
27
27
28 More check are also done about the file system states:
28 More check are also done about the file system states:
29 - traverses a symlink (e.g. a/symlink_here/b)
29 - traverses a symlink (e.g. a/symlink_here/b)
30 - inside a nested repository (a callback can be used to approve
30 - inside a nested repository (a callback can be used to approve
31 some nested repositories, e.g., subrepositories)
31 some nested repositories, e.g., subrepositories)
32
32
33 The file system checks are only done when 'realfs' is set to True (the
33 The file system checks are only done when 'realfs' is set to True (the
34 default). They should be disable then we are auditing path for operation on
34 default). They should be disable then we are auditing path for operation on
35 stored history.
35 stored history.
36
36
37 If 'cached' is set to True, audited paths and sub-directories are cached.
37 If 'cached' is set to True, audited paths and sub-directories are cached.
38 Be careful to not keep the cache of unmanaged directories for long because
38 Be careful to not keep the cache of unmanaged directories for long because
39 audited paths may be replaced with symlinks.
39 audited paths may be replaced with symlinks.
40 '''
40 '''
41
41
42 def __init__(self, root, callback=None, realfs=True, cached=False):
42 def __init__(self, root, callback=None, realfs=True, cached=False):
43 self.audited = set()
43 self.audited = set()
44 self.auditeddir = set()
44 self.auditeddir = set()
45 self.root = root
45 self.root = root
46 self._realfs = realfs
46 self._realfs = realfs
47 self._cached = cached
47 self._cached = cached
48 self.callback = callback
48 self.callback = callback
49 if os.path.lexists(root) and not util.fscasesensitive(root):
49 if os.path.lexists(root) and not util.fscasesensitive(root):
50 self.normcase = util.normcase
50 self.normcase = util.normcase
51 else:
51 else:
52 self.normcase = lambda x: x
52 self.normcase = lambda x: x
53
53
54 def __call__(self, path, mode=None):
54 def __call__(self, path, mode=None):
55 '''Check the relative path.
55 '''Check the relative path.
56 path may contain a pattern (e.g. foodir/**.txt)'''
56 path may contain a pattern (e.g. foodir/**.txt)'''
57
57
58 path = util.localpath(path)
58 path = util.localpath(path)
59 normpath = self.normcase(path)
59 normpath = self.normcase(path)
60 if normpath in self.audited:
60 if normpath in self.audited:
61 return
61 return
62 # AIX ignores "/" at end of path, others raise EISDIR.
62 # AIX ignores "/" at end of path, others raise EISDIR.
63 if util.endswithsep(path):
63 if util.endswithsep(path):
64 raise error.Abort(_("path ends in directory separator: %s") % path)
64 raise error.Abort(_("path ends in directory separator: %s") % path)
65 parts = util.splitpath(path)
65 parts = util.splitpath(path)
66 if (os.path.splitdrive(path)[0]
66 if (os.path.splitdrive(path)[0]
67 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
67 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
68 or os.pardir in parts):
68 or os.pardir in parts):
69 raise error.Abort(_("path contains illegal component: %s") % path)
69 raise error.Abort(_("path contains illegal component: %s") % path)
70 # Windows shortname aliases
70 # Windows shortname aliases
71 for p in parts:
71 for p in parts:
72 if "~" in p:
72 if "~" in p:
73 first, last = p.split("~", 1)
73 first, last = p.split("~", 1)
74 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
74 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
75 raise error.Abort(_("path contains illegal component: %s")
75 raise error.Abort(_("path contains illegal component: %s")
76 % path)
76 % path)
77 if '.hg' in _lowerclean(path):
77 if '.hg' in _lowerclean(path):
78 lparts = [_lowerclean(p.lower()) for p in parts]
78 lparts = [_lowerclean(p.lower()) for p in parts]
79 for p in '.hg', '.hg.':
79 for p in '.hg', '.hg.':
80 if p in lparts[1:]:
80 if p in lparts[1:]:
81 pos = lparts.index(p)
81 pos = lparts.index(p)
82 base = os.path.join(*parts[:pos])
82 base = os.path.join(*parts[:pos])
83 raise error.Abort(_("path '%s' is inside nested repo %r")
83 raise error.Abort(_("path '%s' is inside nested repo %r")
84 % (path, base))
84 % (path, base))
85
85
86 normparts = util.splitpath(normpath)
86 normparts = util.splitpath(normpath)
87 assert len(parts) == len(normparts)
87 assert len(parts) == len(normparts)
88
88
89 parts.pop()
89 parts.pop()
90 normparts.pop()
90 normparts.pop()
91 prefixes = []
91 prefixes = []
92 # It's important that we check the path parts starting from the root.
92 # It's important that we check the path parts starting from the root.
93 # This means we won't accidentally traverse a symlink into some other
93 # This means we won't accidentally traverse a symlink into some other
94 # filesystem (which is potentially expensive to access).
94 # filesystem (which is potentially expensive to access).
95 for i in range(len(parts)):
95 for i in range(len(parts)):
96 prefix = pycompat.ossep.join(parts[:i + 1])
96 prefix = pycompat.ossep.join(parts[:i + 1])
97 normprefix = pycompat.ossep.join(normparts[:i + 1])
97 normprefix = pycompat.ossep.join(normparts[:i + 1])
98 if normprefix in self.auditeddir:
98 if normprefix in self.auditeddir:
99 continue
99 continue
100 if self._realfs:
100 if self._realfs:
101 self._checkfs(prefix, path)
101 self._checkfs(prefix, path)
102 prefixes.append(normprefix)
102 prefixes.append(normprefix)
103
103
104 if self._cached:
104 if self._cached:
105 self.audited.add(normpath)
105 self.audited.add(normpath)
106 # only add prefixes to the cache after checking everything: we don't
106 # only add prefixes to the cache after checking everything: we don't
107 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
107 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
108 self.auditeddir.update(prefixes)
108 self.auditeddir.update(prefixes)
109
109
110 def _checkfs(self, prefix, path):
110 def _checkfs(self, prefix, path):
111 """raise exception if a file system backed check fails"""
111 """raise exception if a file system backed check fails"""
112 curpath = os.path.join(self.root, prefix)
112 curpath = os.path.join(self.root, prefix)
113 try:
113 try:
114 st = os.lstat(curpath)
114 st = os.lstat(curpath)
115 except OSError as err:
115 except OSError as err:
116 # EINVAL can be raised as invalid path syntax under win32.
116 # EINVAL can be raised as invalid path syntax under win32.
117 # They must be ignored for patterns can be checked too.
117 # They must be ignored for patterns can be checked too.
118 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
118 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
119 raise
119 raise
120 else:
120 else:
121 if stat.S_ISLNK(st.st_mode):
121 if stat.S_ISLNK(st.st_mode):
122 msg = _('path %r traverses symbolic link %r') % (path, prefix)
122 msg = _('path %r traverses symbolic link %r') % (path, prefix)
123 raise error.Abort(msg)
123 raise error.Abort(msg)
124 elif (stat.S_ISDIR(st.st_mode) and
124 elif (stat.S_ISDIR(st.st_mode) and
125 os.path.isdir(os.path.join(curpath, '.hg'))):
125 os.path.isdir(os.path.join(curpath, '.hg'))):
126 if not self.callback or not self.callback(curpath):
126 if not self.callback or not self.callback(curpath):
127 msg = _("path '%s' is inside nested repo %r")
127 msg = _("path '%s' is inside nested repo %r")
128 raise error.Abort(msg % (path, prefix))
128 raise error.Abort(msg % (path, prefix))
129
129
130 def check(self, path):
130 def check(self, path):
131 try:
131 try:
132 self(path)
132 self(path)
133 return True
133 return True
134 except (OSError, error.Abort):
134 except (OSError, error.Abort):
135 return False
135 return False
136
136
137 def canonpath(root, cwd, myname, auditor=None):
137 def canonpath(root, cwd, myname, auditor=None):
138 '''return the canonical path of myname, given cwd and root'''
138 '''return the canonical path of myname, given cwd and root
139
140 >>> def check(root, cwd, myname):
141 ... a = pathauditor(root, realfs=False)
142 ... try:
143 ... return canonpath(root, cwd, myname, a)
144 ... except error.Abort:
145 ... return 'aborted'
146 >>> def unixonly(root, cwd, myname, expected='aborted'):
147 ... if pycompat.iswindows:
148 ... return expected
149 ... return check(root, cwd, myname)
150 >>> def winonly(root, cwd, myname, expected='aborted'):
151 ... if not pycompat.iswindows:
152 ... return expected
153 ... return check(root, cwd, myname)
154 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
155 'aborted'
156 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
157 'aborted'
158 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
159 'aborted'
160 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
161 ... b'filename')
162 'filename'
163 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
164 'filename'
165 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
166 ... b'subdir/filename')
167 'subdir/filename'
168 >>> unixonly(b'/repo', b'/dir', b'filename')
169 'aborted'
170 >>> unixonly(b'/repo', b'/', b'filename')
171 'aborted'
172 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
173 'filename'
174 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
175 'filename'
176 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
177 'subdir/filename'
178 '''
139 if util.endswithsep(root):
179 if util.endswithsep(root):
140 rootsep = root
180 rootsep = root
141 else:
181 else:
142 rootsep = root + pycompat.ossep
182 rootsep = root + pycompat.ossep
143 name = myname
183 name = myname
144 if not os.path.isabs(name):
184 if not os.path.isabs(name):
145 name = os.path.join(root, cwd, name)
185 name = os.path.join(root, cwd, name)
146 name = os.path.normpath(name)
186 name = os.path.normpath(name)
147 if auditor is None:
187 if auditor is None:
148 auditor = pathauditor(root)
188 auditor = pathauditor(root)
149 if name != rootsep and name.startswith(rootsep):
189 if name != rootsep and name.startswith(rootsep):
150 name = name[len(rootsep):]
190 name = name[len(rootsep):]
151 auditor(name)
191 auditor(name)
152 return util.pconvert(name)
192 return util.pconvert(name)
153 elif name == root:
193 elif name == root:
154 return ''
194 return ''
155 else:
195 else:
156 # Determine whether `name' is in the hierarchy at or beneath `root',
196 # Determine whether `name' is in the hierarchy at or beneath `root',
157 # by iterating name=dirname(name) until that causes no change (can't
197 # by iterating name=dirname(name) until that causes no change (can't
158 # check name == '/', because that doesn't work on windows). The list
198 # check name == '/', because that doesn't work on windows). The list
159 # `rel' holds the reversed list of components making up the relative
199 # `rel' holds the reversed list of components making up the relative
160 # file name we want.
200 # file name we want.
161 rel = []
201 rel = []
162 while True:
202 while True:
163 try:
203 try:
164 s = util.samefile(name, root)
204 s = util.samefile(name, root)
165 except OSError:
205 except OSError:
166 s = False
206 s = False
167 if s:
207 if s:
168 if not rel:
208 if not rel:
169 # name was actually the same as root (maybe a symlink)
209 # name was actually the same as root (maybe a symlink)
170 return ''
210 return ''
171 rel.reverse()
211 rel.reverse()
172 name = os.path.join(*rel)
212 name = os.path.join(*rel)
173 auditor(name)
213 auditor(name)
174 return util.pconvert(name)
214 return util.pconvert(name)
175 dirname, basename = util.split(name)
215 dirname, basename = util.split(name)
176 rel.append(basename)
216 rel.append(basename)
177 if dirname == name:
217 if dirname == name:
178 break
218 break
179 name = dirname
219 name = dirname
180
220
181 # A common mistake is to use -R, but specify a file relative to the repo
221 # A common mistake is to use -R, but specify a file relative to the repo
182 # instead of cwd. Detect that case, and provide a hint to the user.
222 # instead of cwd. Detect that case, and provide a hint to the user.
183 hint = None
223 hint = None
184 try:
224 try:
185 if cwd != root:
225 if cwd != root:
186 canonpath(root, root, myname, auditor)
226 canonpath(root, root, myname, auditor)
187 relpath = util.pathto(root, cwd, '')
227 relpath = util.pathto(root, cwd, '')
188 if relpath[-1] == pycompat.ossep:
228 if relpath[-1] == pycompat.ossep:
189 relpath = relpath[:-1]
229 relpath = relpath[:-1]
190 hint = (_("consider using '--cwd %s'") % relpath)
230 hint = (_("consider using '--cwd %s'") % relpath)
191 except error.Abort:
231 except error.Abort:
192 pass
232 pass
193
233
194 raise error.Abort(_("%s not under root '%s'") % (myname, root),
234 raise error.Abort(_("%s not under root '%s'") % (myname, root),
195 hint=hint)
235 hint=hint)
196
236
197 def normasprefix(path):
237 def normasprefix(path):
198 '''normalize the specified path as path prefix
238 '''normalize the specified path as path prefix
199
239
200 Returned value can be used safely for "p.startswith(prefix)",
240 Returned value can be used safely for "p.startswith(prefix)",
201 "p[len(prefix):]", and so on.
241 "p[len(prefix):]", and so on.
202
242
203 For efficiency, this expects "path" argument to be already
243 For efficiency, this expects "path" argument to be already
204 normalized by "os.path.normpath", "os.path.realpath", and so on.
244 normalized by "os.path.normpath", "os.path.realpath", and so on.
205
245
206 See also issue3033 for detail about need of this function.
246 See also issue3033 for detail about need of this function.
207
247
208 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
248 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
209 '/foo/bar/'
249 '/foo/bar/'
210 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
250 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
211 '/'
251 '/'
212 '''
252 '''
213 d, p = os.path.splitdrive(path)
253 d, p = os.path.splitdrive(path)
214 if len(p) != len(pycompat.ossep):
254 if len(p) != len(pycompat.ossep):
215 return path + pycompat.ossep
255 return path + pycompat.ossep
216 else:
256 else:
217 return path
257 return path
218
258
219 # forward two methods from posixpath that do what we need, but we'd
259 # forward two methods from posixpath that do what we need, but we'd
220 # rather not let our internals know that we're thinking in posix terms
260 # rather not let our internals know that we're thinking in posix terms
221 # - instead we'll let them be oblivious.
261 # - instead we'll let them be oblivious.
222 join = posixpath.join
262 join = posixpath.join
223 dirname = posixpath.dirname
263 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now