##// END OF EJS Templates
pathutil: add doctests for canonpath()...
Matt Harbison -
r34981:705d0f2b stable
parent child Browse files
Show More
@@ -1,223 +1,263 b''
1 1 from __future__ import absolute_import
2 2
3 3 import errno
4 4 import os
5 5 import posixpath
6 6 import stat
7 7
8 8 from .i18n import _
9 9 from . import (
10 10 encoding,
11 11 error,
12 12 pycompat,
13 13 util,
14 14 )
15 15
16 16 def _lowerclean(s):
17 17 return encoding.hfsignoreclean(s.lower())
18 18
19 19 class pathauditor(object):
20 20 '''ensure that a filesystem path contains no banned components.
21 21 the following properties of a path are checked:
22 22
23 23 - ends with a directory separator
24 24 - under top-level .hg
25 25 - starts at the root of a windows drive
26 26 - contains ".."
27 27
28 28 More check are also done about the file system states:
29 29 - traverses a symlink (e.g. a/symlink_here/b)
30 30 - inside a nested repository (a callback can be used to approve
31 31 some nested repositories, e.g., subrepositories)
32 32
33 33 The file system checks are only done when 'realfs' is set to True (the
34 34 default). They should be disable then we are auditing path for operation on
35 35 stored history.
36 36
37 37 If 'cached' is set to True, audited paths and sub-directories are cached.
38 38 Be careful to not keep the cache of unmanaged directories for long because
39 39 audited paths may be replaced with symlinks.
40 40 '''
41 41
42 42 def __init__(self, root, callback=None, realfs=True, cached=False):
43 43 self.audited = set()
44 44 self.auditeddir = set()
45 45 self.root = root
46 46 self._realfs = realfs
47 47 self._cached = cached
48 48 self.callback = callback
49 49 if os.path.lexists(root) and not util.fscasesensitive(root):
50 50 self.normcase = util.normcase
51 51 else:
52 52 self.normcase = lambda x: x
53 53
54 54 def __call__(self, path, mode=None):
55 55 '''Check the relative path.
56 56 path may contain a pattern (e.g. foodir/**.txt)'''
57 57
58 58 path = util.localpath(path)
59 59 normpath = self.normcase(path)
60 60 if normpath in self.audited:
61 61 return
62 62 # AIX ignores "/" at end of path, others raise EISDIR.
63 63 if util.endswithsep(path):
64 64 raise error.Abort(_("path ends in directory separator: %s") % path)
65 65 parts = util.splitpath(path)
66 66 if (os.path.splitdrive(path)[0]
67 67 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
68 68 or os.pardir in parts):
69 69 raise error.Abort(_("path contains illegal component: %s") % path)
70 70 # Windows shortname aliases
71 71 for p in parts:
72 72 if "~" in p:
73 73 first, last = p.split("~", 1)
74 74 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
75 75 raise error.Abort(_("path contains illegal component: %s")
76 76 % path)
77 77 if '.hg' in _lowerclean(path):
78 78 lparts = [_lowerclean(p.lower()) for p in parts]
79 79 for p in '.hg', '.hg.':
80 80 if p in lparts[1:]:
81 81 pos = lparts.index(p)
82 82 base = os.path.join(*parts[:pos])
83 83 raise error.Abort(_("path '%s' is inside nested repo %r")
84 84 % (path, base))
85 85
86 86 normparts = util.splitpath(normpath)
87 87 assert len(parts) == len(normparts)
88 88
89 89 parts.pop()
90 90 normparts.pop()
91 91 prefixes = []
92 92 # It's important that we check the path parts starting from the root.
93 93 # This means we won't accidentally traverse a symlink into some other
94 94 # filesystem (which is potentially expensive to access).
95 95 for i in range(len(parts)):
96 96 prefix = pycompat.ossep.join(parts[:i + 1])
97 97 normprefix = pycompat.ossep.join(normparts[:i + 1])
98 98 if normprefix in self.auditeddir:
99 99 continue
100 100 if self._realfs:
101 101 self._checkfs(prefix, path)
102 102 prefixes.append(normprefix)
103 103
104 104 if self._cached:
105 105 self.audited.add(normpath)
106 106 # only add prefixes to the cache after checking everything: we don't
107 107 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
108 108 self.auditeddir.update(prefixes)
109 109
110 110 def _checkfs(self, prefix, path):
111 111 """raise exception if a file system backed check fails"""
112 112 curpath = os.path.join(self.root, prefix)
113 113 try:
114 114 st = os.lstat(curpath)
115 115 except OSError as err:
116 116 # EINVAL can be raised as invalid path syntax under win32.
117 117 # They must be ignored for patterns can be checked too.
118 118 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
119 119 raise
120 120 else:
121 121 if stat.S_ISLNK(st.st_mode):
122 122 msg = _('path %r traverses symbolic link %r') % (path, prefix)
123 123 raise error.Abort(msg)
124 124 elif (stat.S_ISDIR(st.st_mode) and
125 125 os.path.isdir(os.path.join(curpath, '.hg'))):
126 126 if not self.callback or not self.callback(curpath):
127 127 msg = _("path '%s' is inside nested repo %r")
128 128 raise error.Abort(msg % (path, prefix))
129 129
130 130 def check(self, path):
131 131 try:
132 132 self(path)
133 133 return True
134 134 except (OSError, error.Abort):
135 135 return False
136 136
137 137 def canonpath(root, cwd, myname, auditor=None):
138 '''return the canonical path of myname, given cwd and root'''
138 '''return the canonical path of myname, given cwd and root
139
140 >>> def check(root, cwd, myname):
141 ... a = pathauditor(root, realfs=False)
142 ... try:
143 ... return canonpath(root, cwd, myname, a)
144 ... except error.Abort:
145 ... return 'aborted'
146 >>> def unixonly(root, cwd, myname, expected='aborted'):
147 ... if pycompat.iswindows:
148 ... return expected
149 ... return check(root, cwd, myname)
150 >>> def winonly(root, cwd, myname, expected='aborted'):
151 ... if not pycompat.iswindows:
152 ... return expected
153 ... return check(root, cwd, myname)
154 >>> winonly(b'd:\\\\repo', b'c:\\\\dir', b'filename')
155 'aborted'
156 >>> winonly(b'c:\\\\repo', b'c:\\\\dir', b'filename')
157 'aborted'
158 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'filename')
159 'aborted'
160 >>> winonly(b'c:\\\\repo', b'c:\\\\', b'repo\\\\filename',
161 ... b'filename')
162 'filename'
163 >>> winonly(b'c:\\\\repo', b'c:\\\\repo', b'filename', b'filename')
164 'filename'
165 >>> winonly(b'c:\\\\repo', b'c:\\\\repo\\\\subdir', b'filename',
166 ... b'subdir/filename')
167 'subdir/filename'
168 >>> unixonly(b'/repo', b'/dir', b'filename')
169 'aborted'
170 >>> unixonly(b'/repo', b'/', b'filename')
171 'aborted'
172 >>> unixonly(b'/repo', b'/', b'repo/filename', b'filename')
173 'filename'
174 >>> unixonly(b'/repo', b'/repo', b'filename', b'filename')
175 'filename'
176 >>> unixonly(b'/repo', b'/repo/subdir', b'filename', b'subdir/filename')
177 'subdir/filename'
178 '''
139 179 if util.endswithsep(root):
140 180 rootsep = root
141 181 else:
142 182 rootsep = root + pycompat.ossep
143 183 name = myname
144 184 if not os.path.isabs(name):
145 185 name = os.path.join(root, cwd, name)
146 186 name = os.path.normpath(name)
147 187 if auditor is None:
148 188 auditor = pathauditor(root)
149 189 if name != rootsep and name.startswith(rootsep):
150 190 name = name[len(rootsep):]
151 191 auditor(name)
152 192 return util.pconvert(name)
153 193 elif name == root:
154 194 return ''
155 195 else:
156 196 # Determine whether `name' is in the hierarchy at or beneath `root',
157 197 # by iterating name=dirname(name) until that causes no change (can't
158 198 # check name == '/', because that doesn't work on windows). The list
159 199 # `rel' holds the reversed list of components making up the relative
160 200 # file name we want.
161 201 rel = []
162 202 while True:
163 203 try:
164 204 s = util.samefile(name, root)
165 205 except OSError:
166 206 s = False
167 207 if s:
168 208 if not rel:
169 209 # name was actually the same as root (maybe a symlink)
170 210 return ''
171 211 rel.reverse()
172 212 name = os.path.join(*rel)
173 213 auditor(name)
174 214 return util.pconvert(name)
175 215 dirname, basename = util.split(name)
176 216 rel.append(basename)
177 217 if dirname == name:
178 218 break
179 219 name = dirname
180 220
181 221 # A common mistake is to use -R, but specify a file relative to the repo
182 222 # instead of cwd. Detect that case, and provide a hint to the user.
183 223 hint = None
184 224 try:
185 225 if cwd != root:
186 226 canonpath(root, root, myname, auditor)
187 227 relpath = util.pathto(root, cwd, '')
188 228 if relpath[-1] == pycompat.ossep:
189 229 relpath = relpath[:-1]
190 230 hint = (_("consider using '--cwd %s'") % relpath)
191 231 except error.Abort:
192 232 pass
193 233
194 234 raise error.Abort(_("%s not under root '%s'") % (myname, root),
195 235 hint=hint)
196 236
197 237 def normasprefix(path):
198 238 '''normalize the specified path as path prefix
199 239
200 240 Returned value can be used safely for "p.startswith(prefix)",
201 241 "p[len(prefix):]", and so on.
202 242
203 243 For efficiency, this expects "path" argument to be already
204 244 normalized by "os.path.normpath", "os.path.realpath", and so on.
205 245
206 246 See also issue3033 for detail about need of this function.
207 247
208 248 >>> normasprefix(b'/foo/bar').replace(pycompat.ossep, b'/')
209 249 '/foo/bar/'
210 250 >>> normasprefix(b'/').replace(pycompat.ossep, b'/')
211 251 '/'
212 252 '''
213 253 d, p = os.path.splitdrive(path)
214 254 if len(p) != len(pycompat.ossep):
215 255 return path + pycompat.ossep
216 256 else:
217 257 return path
218 258
219 259 # forward two methods from posixpath that do what we need, but we'd
220 260 # rather not let our internals know that we're thinking in posix terms
221 261 # - instead we'll let them be oblivious.
222 262 join = posixpath.join
223 263 dirname = posixpath.dirname
General Comments 0
You need to be logged in to leave comments. Login now