##// END OF EJS Templates
pathutil: add dirname and join functions...
Durham Goode -
r25281:660b178f default
parent child Browse files
Show More
@@ -1,189 +1,254
1 import os, errno, stat
1 import os, errno, stat
2
2
3 import encoding
3 import encoding
4 import util
4 import util
5 from i18n import _
5 from i18n import _
6
6
7 def _lowerclean(s):
7 def _lowerclean(s):
8 return encoding.hfsignoreclean(s.lower())
8 return encoding.hfsignoreclean(s.lower())
9
9
10 class pathauditor(object):
10 class pathauditor(object):
11 '''ensure that a filesystem path contains no banned components.
11 '''ensure that a filesystem path contains no banned components.
12 the following properties of a path are checked:
12 the following properties of a path are checked:
13
13
14 - ends with a directory separator
14 - ends with a directory separator
15 - under top-level .hg
15 - under top-level .hg
16 - starts at the root of a windows drive
16 - starts at the root of a windows drive
17 - contains ".."
17 - contains ".."
18 - traverses a symlink (e.g. a/symlink_here/b)
18 - traverses a symlink (e.g. a/symlink_here/b)
19 - inside a nested repository (a callback can be used to approve
19 - inside a nested repository (a callback can be used to approve
20 some nested repositories, e.g., subrepositories)
20 some nested repositories, e.g., subrepositories)
21 '''
21 '''
22
22
23 def __init__(self, root, callback=None):
23 def __init__(self, root, callback=None):
24 self.audited = set()
24 self.audited = set()
25 self.auditeddir = set()
25 self.auditeddir = set()
26 self.root = root
26 self.root = root
27 self.callback = callback
27 self.callback = callback
28 if os.path.lexists(root) and not util.checkcase(root):
28 if os.path.lexists(root) and not util.checkcase(root):
29 self.normcase = util.normcase
29 self.normcase = util.normcase
30 else:
30 else:
31 self.normcase = lambda x: x
31 self.normcase = lambda x: x
32
32
33 def __call__(self, path):
33 def __call__(self, path):
34 '''Check the relative path.
34 '''Check the relative path.
35 path may contain a pattern (e.g. foodir/**.txt)'''
35 path may contain a pattern (e.g. foodir/**.txt)'''
36
36
37 path = util.localpath(path)
37 path = util.localpath(path)
38 normpath = self.normcase(path)
38 normpath = self.normcase(path)
39 if normpath in self.audited:
39 if normpath in self.audited:
40 return
40 return
41 # AIX ignores "/" at end of path, others raise EISDIR.
41 # AIX ignores "/" at end of path, others raise EISDIR.
42 if util.endswithsep(path):
42 if util.endswithsep(path):
43 raise util.Abort(_("path ends in directory separator: %s") % path)
43 raise util.Abort(_("path ends in directory separator: %s") % path)
44 parts = util.splitpath(path)
44 parts = util.splitpath(path)
45 if (os.path.splitdrive(path)[0]
45 if (os.path.splitdrive(path)[0]
46 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
46 or _lowerclean(parts[0]) in ('.hg', '.hg.', '')
47 or os.pardir in parts):
47 or os.pardir in parts):
48 raise util.Abort(_("path contains illegal component: %s") % path)
48 raise util.Abort(_("path contains illegal component: %s") % path)
49 # Windows shortname aliases
49 # Windows shortname aliases
50 for p in parts:
50 for p in parts:
51 if "~" in p:
51 if "~" in p:
52 first, last = p.split("~", 1)
52 first, last = p.split("~", 1)
53 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
53 if last.isdigit() and first.upper() in ["HG", "HG8B6C"]:
54 raise util.Abort(_("path contains illegal component: %s")
54 raise util.Abort(_("path contains illegal component: %s")
55 % path)
55 % path)
56 if '.hg' in _lowerclean(path):
56 if '.hg' in _lowerclean(path):
57 lparts = [_lowerclean(p.lower()) for p in parts]
57 lparts = [_lowerclean(p.lower()) for p in parts]
58 for p in '.hg', '.hg.':
58 for p in '.hg', '.hg.':
59 if p in lparts[1:]:
59 if p in lparts[1:]:
60 pos = lparts.index(p)
60 pos = lparts.index(p)
61 base = os.path.join(*parts[:pos])
61 base = os.path.join(*parts[:pos])
62 raise util.Abort(_("path '%s' is inside nested repo %r")
62 raise util.Abort(_("path '%s' is inside nested repo %r")
63 % (path, base))
63 % (path, base))
64
64
65 normparts = util.splitpath(normpath)
65 normparts = util.splitpath(normpath)
66 assert len(parts) == len(normparts)
66 assert len(parts) == len(normparts)
67
67
68 parts.pop()
68 parts.pop()
69 normparts.pop()
69 normparts.pop()
70 prefixes = []
70 prefixes = []
71 while parts:
71 while parts:
72 prefix = os.sep.join(parts)
72 prefix = os.sep.join(parts)
73 normprefix = os.sep.join(normparts)
73 normprefix = os.sep.join(normparts)
74 if normprefix in self.auditeddir:
74 if normprefix in self.auditeddir:
75 break
75 break
76 curpath = os.path.join(self.root, prefix)
76 curpath = os.path.join(self.root, prefix)
77 try:
77 try:
78 st = os.lstat(curpath)
78 st = os.lstat(curpath)
79 except OSError, err:
79 except OSError, err:
80 # EINVAL can be raised as invalid path syntax under win32.
80 # EINVAL can be raised as invalid path syntax under win32.
81 # They must be ignored for patterns can be checked too.
81 # They must be ignored for patterns can be checked too.
82 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
82 if err.errno not in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
83 raise
83 raise
84 else:
84 else:
85 if stat.S_ISLNK(st.st_mode):
85 if stat.S_ISLNK(st.st_mode):
86 raise util.Abort(
86 raise util.Abort(
87 _('path %r traverses symbolic link %r')
87 _('path %r traverses symbolic link %r')
88 % (path, prefix))
88 % (path, prefix))
89 elif (stat.S_ISDIR(st.st_mode) and
89 elif (stat.S_ISDIR(st.st_mode) and
90 os.path.isdir(os.path.join(curpath, '.hg'))):
90 os.path.isdir(os.path.join(curpath, '.hg'))):
91 if not self.callback or not self.callback(curpath):
91 if not self.callback or not self.callback(curpath):
92 raise util.Abort(_("path '%s' is inside nested "
92 raise util.Abort(_("path '%s' is inside nested "
93 "repo %r")
93 "repo %r")
94 % (path, prefix))
94 % (path, prefix))
95 prefixes.append(normprefix)
95 prefixes.append(normprefix)
96 parts.pop()
96 parts.pop()
97 normparts.pop()
97 normparts.pop()
98
98
99 self.audited.add(normpath)
99 self.audited.add(normpath)
100 # only add prefixes to the cache after checking everything: we don't
100 # only add prefixes to the cache after checking everything: we don't
101 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
101 # want to add "foo/bar/baz" before checking if there's a "foo/.hg"
102 self.auditeddir.update(prefixes)
102 self.auditeddir.update(prefixes)
103
103
104 def check(self, path):
104 def check(self, path):
105 try:
105 try:
106 self(path)
106 self(path)
107 return True
107 return True
108 except (OSError, util.Abort):
108 except (OSError, util.Abort):
109 return False
109 return False
110
110
111 def canonpath(root, cwd, myname, auditor=None):
111 def canonpath(root, cwd, myname, auditor=None):
112 '''return the canonical path of myname, given cwd and root'''
112 '''return the canonical path of myname, given cwd and root'''
113 if util.endswithsep(root):
113 if util.endswithsep(root):
114 rootsep = root
114 rootsep = root
115 else:
115 else:
116 rootsep = root + os.sep
116 rootsep = root + os.sep
117 name = myname
117 name = myname
118 if not os.path.isabs(name):
118 if not os.path.isabs(name):
119 name = os.path.join(root, cwd, name)
119 name = os.path.join(root, cwd, name)
120 name = os.path.normpath(name)
120 name = os.path.normpath(name)
121 if auditor is None:
121 if auditor is None:
122 auditor = pathauditor(root)
122 auditor = pathauditor(root)
123 if name != rootsep and name.startswith(rootsep):
123 if name != rootsep and name.startswith(rootsep):
124 name = name[len(rootsep):]
124 name = name[len(rootsep):]
125 auditor(name)
125 auditor(name)
126 return util.pconvert(name)
126 return util.pconvert(name)
127 elif name == root:
127 elif name == root:
128 return ''
128 return ''
129 else:
129 else:
130 # Determine whether `name' is in the hierarchy at or beneath `root',
130 # Determine whether `name' is in the hierarchy at or beneath `root',
131 # by iterating name=dirname(name) until that causes no change (can't
131 # by iterating name=dirname(name) until that causes no change (can't
132 # check name == '/', because that doesn't work on windows). The list
132 # check name == '/', because that doesn't work on windows). The list
133 # `rel' holds the reversed list of components making up the relative
133 # `rel' holds the reversed list of components making up the relative
134 # file name we want.
134 # file name we want.
135 rel = []
135 rel = []
136 while True:
136 while True:
137 try:
137 try:
138 s = util.samefile(name, root)
138 s = util.samefile(name, root)
139 except OSError:
139 except OSError:
140 s = False
140 s = False
141 if s:
141 if s:
142 if not rel:
142 if not rel:
143 # name was actually the same as root (maybe a symlink)
143 # name was actually the same as root (maybe a symlink)
144 return ''
144 return ''
145 rel.reverse()
145 rel.reverse()
146 name = os.path.join(*rel)
146 name = os.path.join(*rel)
147 auditor(name)
147 auditor(name)
148 return util.pconvert(name)
148 return util.pconvert(name)
149 dirname, basename = util.split(name)
149 dirname, basename = util.split(name)
150 rel.append(basename)
150 rel.append(basename)
151 if dirname == name:
151 if dirname == name:
152 break
152 break
153 name = dirname
153 name = dirname
154
154
155 # A common mistake is to use -R, but specify a file relative to the repo
155 # A common mistake is to use -R, but specify a file relative to the repo
156 # instead of cwd. Detect that case, and provide a hint to the user.
156 # instead of cwd. Detect that case, and provide a hint to the user.
157 hint = None
157 hint = None
158 try:
158 try:
159 if cwd != root:
159 if cwd != root:
160 canonpath(root, root, myname, auditor)
160 canonpath(root, root, myname, auditor)
161 hint = (_("consider using '--cwd %s'")
161 hint = (_("consider using '--cwd %s'")
162 % os.path.relpath(root, cwd))
162 % os.path.relpath(root, cwd))
163 except util.Abort:
163 except util.Abort:
164 pass
164 pass
165
165
166 raise util.Abort(_("%s not under root '%s'") % (myname, root),
166 raise util.Abort(_("%s not under root '%s'") % (myname, root),
167 hint=hint)
167 hint=hint)
168
168
169 def normasprefix(path):
169 def normasprefix(path):
170 '''normalize the specified path as path prefix
170 '''normalize the specified path as path prefix
171
171
172 Returned value can be used safely for "p.startswith(prefix)",
172 Returned value can be used safely for "p.startswith(prefix)",
173 "p[len(prefix):]", and so on.
173 "p[len(prefix):]", and so on.
174
174
175 For efficiency, this expects "path" argument to be already
175 For efficiency, this expects "path" argument to be already
176 normalized by "os.path.normpath", "os.path.realpath", and so on.
176 normalized by "os.path.normpath", "os.path.realpath", and so on.
177
177
178 See also issue3033 for detail about need of this function.
178 See also issue3033 for detail about need of this function.
179
179
180 >>> normasprefix('/foo/bar').replace(os.sep, '/')
180 >>> normasprefix('/foo/bar').replace(os.sep, '/')
181 '/foo/bar/'
181 '/foo/bar/'
182 >>> normasprefix('/').replace(os.sep, '/')
182 >>> normasprefix('/').replace(os.sep, '/')
183 '/'
183 '/'
184 '''
184 '''
185 d, p = os.path.splitdrive(path)
185 d, p = os.path.splitdrive(path)
186 if len(p) != len(os.sep):
186 if len(p) != len(os.sep):
187 return path + os.sep
187 return path + os.sep
188 else:
188 else:
189 return path
189 return path
190
191 def join(path, *paths):
192 '''Join two or more pathname components, inserting '/' as needed.
193
194 Based on the posix os.path.join() implementation.
195
196 >>> join('foo', 'bar')
197 'foo/bar'
198 >>> join('/foo', 'bar')
199 '/foo/bar'
200 >>> join('foo', '/bar')
201 '/bar'
202 >>> join('foo', 'bar/')
203 'foo/bar/'
204 >>> join('foo', 'bar', 'gah')
205 'foo/bar/gah'
206 >>> join('foo')
207 'foo'
208 >>> join('', 'foo')
209 'foo'
210 >>> join('foo/', 'bar')
211 'foo/bar'
212 >>> join('', '', '')
213 ''
214 >>> join ('foo', '', '', 'bar')
215 'foo/bar'
216 '''
217 sep = '/'
218 if not paths:
219 path[:0] + sep #23780: Ensure compatible data type even if p is null.
220 for piece in paths:
221 if piece.startswith(sep):
222 path = piece
223 elif not path or path.endswith(sep):
224 path += piece
225 else:
226 path += sep + piece
227 return path
228
229 def dirname(path):
230 '''returns the directory portion of the given path
231
232 Based on the posix os.path.split() implementation.
233
234 >>> dirname('foo')
235 ''
236 >>> dirname('foo/')
237 'foo'
238 >>> dirname('foo/bar')
239 'foo'
240 >>> dirname('/foo')
241 '/'
242 >>> dirname('/foo/bar')
243 '/foo'
244 >>> dirname('/foo//bar/poo')
245 '/foo//bar'
246 >>> dirname('/foo//bar')
247 '/foo'
248 '''
249 sep = '/'
250 i = path.rfind(sep) + 1
251 dirname = path[:i]
252 if dirname and dirname != sep * len(dirname):
253 dirname = dirname.rstrip(sep)
254 return dirname
General Comments 0
You need to be logged in to leave comments. Login now