##// END OF EJS Templates
match: unnest functions in _matcher
Matt Mackall -
r8574:63a7ed21 default
parent child Browse files
Show More
@@ -1,266 +1,266
1 # match.py - file name matching
1 # match.py - file name matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 import util, re
8 import util, re
9
9
10 class _match(object):
10 class _match(object):
11 def __init__(self, root, cwd, files, mf, ap):
11 def __init__(self, root, cwd, files, mf, ap):
12 self._root = root
12 self._root = root
13 self._cwd = cwd
13 self._cwd = cwd
14 self._files = files
14 self._files = files
15 self._fmap = set(files)
15 self._fmap = set(files)
16 self.matchfn = mf
16 self.matchfn = mf
17 self._anypats = ap
17 self._anypats = ap
18 def __call__(self, fn):
18 def __call__(self, fn):
19 return self.matchfn(fn)
19 return self.matchfn(fn)
20 def __iter__(self):
20 def __iter__(self):
21 for f in self._files:
21 for f in self._files:
22 yield f
22 yield f
23 def bad(self, f, msg):
23 def bad(self, f, msg):
24 return True
24 return True
25 def dir(self, f):
25 def dir(self, f):
26 pass
26 pass
27 def missing(self, f):
27 def missing(self, f):
28 pass
28 pass
29 def exact(self, f):
29 def exact(self, f):
30 return f in self._fmap
30 return f in self._fmap
31 def rel(self, f):
31 def rel(self, f):
32 return util.pathto(self._root, self._cwd, f)
32 return util.pathto(self._root, self._cwd, f)
33 def files(self):
33 def files(self):
34 return self._files
34 return self._files
35 def anypats(self):
35 def anypats(self):
36 return self._anypats
36 return self._anypats
37
37
38 class always(_match):
38 class always(_match):
39 def __init__(self, root, cwd):
39 def __init__(self, root, cwd):
40 _match.__init__(self, root, cwd, [], lambda f: True, False)
40 _match.__init__(self, root, cwd, [], lambda f: True, False)
41
41
42 class never(_match):
42 class never(_match):
43 def __init__(self, root, cwd):
43 def __init__(self, root, cwd):
44 _match.__init__(self, root, cwd, [], lambda f: False, False)
44 _match.__init__(self, root, cwd, [], lambda f: False, False)
45
45
46 class exact(_match):
46 class exact(_match):
47 def __init__(self, root, cwd, files):
47 def __init__(self, root, cwd, files):
48 _match.__init__(self, root, cwd, files, self.exact, False)
48 _match.__init__(self, root, cwd, files, self.exact, False)
49
49
50 class match(_match):
50 class match(_match):
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 default='glob'):
52 default='glob'):
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
54 _match.__init__(self, root, cwd, f, mf, ap)
54 _match.__init__(self, root, cwd, f, mf, ap)
55
55
56 def patkind(pat):
56 def patkind(pat):
57 return _patsplit(pat, None)[0]
57 return _patsplit(pat, None)[0]
58
58
59 def _patsplit(pat, default):
59 def _patsplit(pat, default):
60 """Split a string into an optional pattern kind prefix and the
60 """Split a string into an optional pattern kind prefix and the
61 actual pattern."""
61 actual pattern."""
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
64 return default, pat
64 return default, pat
65
65
66 _globchars = set('[{*?')
66 _globchars = set('[{*?')
67
67
68 def _globre(pat, head, tail):
68 def _globre(pat, head, tail):
69 "convert a glob pattern into a regexp"
69 "convert a glob pattern into a regexp"
70 i, n = 0, len(pat)
70 i, n = 0, len(pat)
71 res = ''
71 res = ''
72 group = 0
72 group = 0
73 def peek(): return i < n and pat[i]
73 def peek(): return i < n and pat[i]
74 while i < n:
74 while i < n:
75 c = pat[i]
75 c = pat[i]
76 i = i+1
76 i = i+1
77 if c == '*':
77 if c == '*':
78 if peek() == '*':
78 if peek() == '*':
79 i += 1
79 i += 1
80 res += '.*'
80 res += '.*'
81 else:
81 else:
82 res += '[^/]*'
82 res += '[^/]*'
83 elif c == '?':
83 elif c == '?':
84 res += '.'
84 res += '.'
85 elif c == '[':
85 elif c == '[':
86 j = i
86 j = i
87 if j < n and pat[j] in '!]':
87 if j < n and pat[j] in '!]':
88 j += 1
88 j += 1
89 while j < n and pat[j] != ']':
89 while j < n and pat[j] != ']':
90 j += 1
90 j += 1
91 if j >= n:
91 if j >= n:
92 res += '\\['
92 res += '\\['
93 else:
93 else:
94 stuff = pat[i:j].replace('\\','\\\\')
94 stuff = pat[i:j].replace('\\','\\\\')
95 i = j + 1
95 i = j + 1
96 if stuff[0] == '!':
96 if stuff[0] == '!':
97 stuff = '^' + stuff[1:]
97 stuff = '^' + stuff[1:]
98 elif stuff[0] == '^':
98 elif stuff[0] == '^':
99 stuff = '\\' + stuff
99 stuff = '\\' + stuff
100 res = '%s[%s]' % (res, stuff)
100 res = '%s[%s]' % (res, stuff)
101 elif c == '{':
101 elif c == '{':
102 group += 1
102 group += 1
103 res += '(?:'
103 res += '(?:'
104 elif c == '}' and group:
104 elif c == '}' and group:
105 res += ')'
105 res += ')'
106 group -= 1
106 group -= 1
107 elif c == ',' and group:
107 elif c == ',' and group:
108 res += '|'
108 res += '|'
109 elif c == '\\':
109 elif c == '\\':
110 p = peek()
110 p = peek()
111 if p:
111 if p:
112 i += 1
112 i += 1
113 res += re.escape(p)
113 res += re.escape(p)
114 else:
114 else:
115 res += re.escape(c)
115 res += re.escape(c)
116 else:
116 else:
117 res += re.escape(c)
117 res += re.escape(c)
118 return head + res + tail
118 return head + res + tail
119
119
120 def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
120 def _regex(kind, name, tail):
121 '''convert a pattern into a regular expression'''
122 if not name:
123 return ''
124 if kind == 're':
125 return name
126 elif kind == 'path':
127 return '^' + re.escape(name) + '(?:/|$)'
128 elif kind == 'relglob':
129 return _globre(name, '(?:|.*/)', tail)
130 elif kind == 'relpath':
131 return re.escape(name) + '(?:/|$)'
132 elif kind == 'relre':
133 if name.startswith('^'):
134 return name
135 return '.*' + name
136 return _globre(name, '', tail)
137
138 def _matchfn(pats, tail):
139 """build a matching function from a set of patterns"""
140 try:
141 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
142 if len(pat) > 20000:
143 raise OverflowError()
144 return re.compile(pat).match
145 except OverflowError:
146 # We're using a Python with a tiny regex engine and we
147 # made it explode, so we'll divide the pattern list in two
148 # until it works
149 l = len(pats)
150 if l < 2:
151 raise
152 a, b = _matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
153 return lambda s: a(s) or b(s)
154 except re.error:
155 for k, p in pats:
156 try:
157 re.compile('(?:%s)' % _regex(k, p, tail))
158 except re.error:
159 raise util.Abort("invalid pattern (%s): %s" % (k, p))
160 raise util.Abort("invalid pattern")
161
162 def _containsglob(name):
163 for c in name:
164 if c in _globchars: return True
165 return False
166
167 def _globprefix(pat):
168 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
169 root = []
170 for p in pat.split('/'):
171 if _containsglob(p): break
172 root.append(p)
173 return '/'.join(root) or '.'
174
175 def _normalizepats(names, default, canonroot, cwd):
176 pats = []
177 roots = []
178 anypats = False
179 for kind, name in [_patsplit(p, default) for p in names]:
180 if kind in ('glob', 'relpath'):
181 name = util.canonpath(canonroot, cwd, name)
182 elif kind in ('relglob', 'path'):
183 name = util.normpath(name)
184
185 pats.append((kind, name))
186
187 if kind in ('glob', 're', 'relglob', 'relre'):
188 anypats = True
189
190 if kind == 'glob':
191 root = _globprefix(name)
192 roots.append(root)
193 elif kind in ('relpath', 'path'):
194 roots.append(name or '.')
195 elif kind == 'relglob':
196 roots.append('.')
197 return roots, pats, anypats
198
199 def _matcher(root, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
121 """build a function to match a set of file patterns
200 """build a function to match a set of file patterns
122
201
123 arguments:
202 arguments:
124 canonroot - the canonical root of the tree you're matching against
203 root - the canonical root of the tree you're matching against
125 cwd - the current working directory, if relevant
204 cwd - the current working directory, if relevant
126 names - patterns to find
205 names - patterns to find
127 inc - patterns to include
206 inc - patterns to include
128 exc - patterns to exclude
207 exc - patterns to exclude
129 dflt_pat - if a pattern in names has no explicit type, assume this one
208 dflt_pat - if a pattern in names has no explicit type, assume this one
130
209
131 a pattern is one of:
210 a pattern is one of:
132 'glob:<glob>' - a glob relative to cwd
211 'glob:<glob>' - a glob relative to cwd
133 're:<regexp>' - a regular expression
212 're:<regexp>' - a regular expression
134 'path:<path>' - a path relative to canonroot
213 'path:<path>' - a path relative to canonroot
135 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
214 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
136 'relpath:<path>' - a path relative to cwd
215 'relpath:<path>' - a path relative to cwd
137 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
216 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
138 '<something>' - one of the cases above, selected by the dflt_pat argument
217 '<something>' - one of the cases above, selected by the dflt_pat argument
139
218
140 returns:
219 returns:
141 a 3-tuple containing
220 a 3-tuple containing
142 - list of roots (places where one should start a recursive walk of the fs);
221 - list of roots (places where one should start a recursive walk of the fs);
143 this often matches the explicit non-pattern names passed in, but also
222 this often matches the explicit non-pattern names passed in, but also
144 includes the initial part of glob: patterns that has no glob characters
223 includes the initial part of glob: patterns that has no glob characters
145 - a bool match(filename) function
224 - a bool match(filename) function
146 - a bool indicating if any patterns were passed in
225 - a bool indicating if any patterns were passed in
147 """
226 """
148
227
149 # a common case: no patterns at all
228 # a common case: no patterns at all
150 if not names and not inc and not exc:
229 if not names and not inc and not exc:
151 return [], lambda f: True, False
230 return [], lambda f: True, False
152
231
153 def contains_glob(name):
232 roots, pats, anypats = _normalizepats(names, dflt_pat, root, cwd)
154 for c in name:
155 if c in _globchars: return True
156 return False
157
158 def regex(kind, name, tail):
159 '''convert a pattern into a regular expression'''
160 if not name:
161 return ''
162 if kind == 're':
163 return name
164 elif kind == 'path':
165 return '^' + re.escape(name) + '(?:/|$)'
166 elif kind == 'relglob':
167 return _globre(name, '(?:|.*/)', tail)
168 elif kind == 'relpath':
169 return re.escape(name) + '(?:/|$)'
170 elif kind == 'relre':
171 if name.startswith('^'):
172 return name
173 return '.*' + name
174 return _globre(name, '', tail)
175
176 def matchfn(pats, tail):
177 """build a matching function from a set of patterns"""
178 try:
179 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
180 if len(pat) > 20000:
181 raise OverflowError()
182 return re.compile(pat).match
183 except OverflowError:
184 # We're using a Python with a tiny regex engine and we
185 # made it explode, so we'll divide the pattern list in two
186 # until it works
187 l = len(pats)
188 if l < 2:
189 raise
190 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
191 return lambda s: a(s) or b(s)
192 except re.error:
193 for k, p in pats:
194 try:
195 re.compile('(?:%s)' % regex(k, p, tail))
196 except re.error:
197 raise util.Abort("invalid pattern (%s): %s" % (k, p))
198 raise util.Abort("invalid pattern")
199
200 def globprefix(pat):
201 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
202 root = []
203 for p in pat.split('/'):
204 if contains_glob(p): break
205 root.append(p)
206 return '/'.join(root) or '.'
207
208 def normalizepats(names, default):
209 pats = []
210 roots = []
211 anypats = False
212 for kind, name in [_patsplit(p, default) for p in names]:
213 if kind in ('glob', 'relpath'):
214 name = util.canonpath(canonroot, cwd, name)
215 elif kind in ('relglob', 'path'):
216 name = util.normpath(name)
217
218 pats.append((kind, name))
219
220 if kind in ('glob', 're', 'relglob', 'relre'):
221 anypats = True
222
223 if kind == 'glob':
224 root = globprefix(name)
225 roots.append(root)
226 elif kind in ('relpath', 'path'):
227 roots.append(name or '.')
228 elif kind == 'relglob':
229 roots.append('.')
230 return roots, pats, anypats
231
232 roots, pats, anypats = normalizepats(names, dflt_pat)
233
233
234 if names:
234 if names:
235 patmatch = matchfn(pats, '$')
235 patmatch = _matchfn(pats, '$')
236 if inc:
236 if inc:
237 dummy, inckinds, dummy = normalizepats(inc, 'glob')
237 dummy, inckinds, dummy = _normalizepats(inc, 'glob', root, cwd)
238 incmatch = matchfn(inckinds, '(?:/|$)')
238 incmatch = _matchfn(inckinds, '(?:/|$)')
239 if exc:
239 if exc:
240 dummy, exckinds, dummy = normalizepats(exc, 'glob')
240 dummy, exckinds, dummy = _normalizepats(exc, 'glob', root, cwd)
241 excmatch = matchfn(exckinds, '(?:/|$)')
241 excmatch = _matchfn(exckinds, '(?:/|$)')
242
242
243 if names:
243 if names:
244 if inc:
244 if inc:
245 if exc:
245 if exc:
246 m = lambda f: incmatch(f) and not excmatch(f) and patmatch(f)
246 m = lambda f: incmatch(f) and not excmatch(f) and patmatch(f)
247 else:
247 else:
248 m = lambda f: incmatch(f) and patmatch(f)
248 m = lambda f: incmatch(f) and patmatch(f)
249 else:
249 else:
250 if exc:
250 if exc:
251 m = lambda f: not excmatch(f) and patmatch(f)
251 m = lambda f: not excmatch(f) and patmatch(f)
252 else:
252 else:
253 m = patmatch
253 m = patmatch
254 else:
254 else:
255 if inc:
255 if inc:
256 if exc:
256 if exc:
257 m = lambda f: incmatch(f) and not excmatch(f)
257 m = lambda f: incmatch(f) and not excmatch(f)
258 else:
258 else:
259 m = incmatch
259 m = incmatch
260 else:
260 else:
261 if exc:
261 if exc:
262 m = lambda f: not excmatch(f)
262 m = lambda f: not excmatch(f)
263 else:
263 else:
264 m = lambda f: True
264 m = lambda f: True
265
265
266 return (roots, m, (inc or exc or anypats) and True)
266 return (roots, m, (inc or exc or anypats) and True)
General Comments 0
You need to be logged in to leave comments. Login now