##// END OF EJS Templates
match: add exact flag to match() to unify all match forms
Matt Mackall -
r8586:347fe1ac default
parent child Browse files
Show More
@@ -1,250 +1,254 b''
1 # match.py - file name matching
1 # match.py - file name matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 import util, re
8 import util, re
9
9
10 class _match(object):
10 class _match(object):
11 def __init__(self, root, cwd, files, mf, ap):
11 def __init__(self, root, cwd, files, mf, ap):
12 self._root = root
12 self._root = root
13 self._cwd = cwd
13 self._cwd = cwd
14 self._files = files
14 self._files = files
15 self._fmap = set(files)
15 self._fmap = set(files)
16 self.matchfn = mf
16 self.matchfn = mf
17 self._anypats = ap
17 self._anypats = ap
18 def __call__(self, fn):
18 def __call__(self, fn):
19 return self.matchfn(fn)
19 return self.matchfn(fn)
20 def __iter__(self):
20 def __iter__(self):
21 for f in self._files:
21 for f in self._files:
22 yield f
22 yield f
23 def bad(self, f, msg):
23 def bad(self, f, msg):
24 return True
24 return True
25 def dir(self, f):
25 def dir(self, f):
26 pass
26 pass
27 def missing(self, f):
27 def missing(self, f):
28 pass
28 pass
29 def exact(self, f):
29 def exact(self, f):
30 return f in self._fmap
30 return f in self._fmap
31 def rel(self, f):
31 def rel(self, f):
32 return util.pathto(self._root, self._cwd, f)
32 return util.pathto(self._root, self._cwd, f)
33 def files(self):
33 def files(self):
34 return self._files
34 return self._files
35 def anypats(self):
35 def anypats(self):
36 return self._anypats
36 return self._anypats
37
37
38 class match(_match):
38 class match(_match):
39 def __init__(self, root, cwd, patterns, include=[], exclude=[],
39 def __init__(self, root, cwd, patterns, include=[], exclude=[],
40 default='glob'):
40 default='glob', exact=False):
41 """build an object to match a set of file patterns
41 """build an object to match a set of file patterns
42
42
43 arguments:
43 arguments:
44 root - the canonical root of the tree you're matching against
44 root - the canonical root of the tree you're matching against
45 cwd - the current working directory, if relevant
45 cwd - the current working directory, if relevant
46 patterns - patterns to find
46 patterns - patterns to find
47 include - patterns to include
47 include - patterns to include
48 exclude - patterns to exclude
48 exclude - patterns to exclude
49 default - if a pattern in names has no explicit type, assume this one
49 default - if a pattern in names has no explicit type, assume this one
50 exact - patterns are actually literals
50
51
51 a pattern is one of:
52 a pattern is one of:
52 'glob:<glob>' - a glob relative to cwd
53 'glob:<glob>' - a glob relative to cwd
53 're:<regexp>' - a regular expression
54 're:<regexp>' - a regular expression
54 'path:<path>' - a path relative to canonroot
55 'path:<path>' - a path relative to canonroot
55 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 'relpath:<path>' - a path relative to cwd
57 'relpath:<path>' - a path relative to cwd
57 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
58 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
58 '<something>' - one of the cases above, selected by the dflt_pat argument
59 '<something>' - one of the cases above, selected by the dflt_pat argument
59 """
60 """
60
61
61 roots = []
62 roots = []
62 anypats = bool(include or exclude)
63 anypats = bool(include or exclude)
63
64
64 if patterns:
65 if include:
66 im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
67 if exclude:
68 em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
69 if exact:
70 roots = patterns
71 pm = self.exact
72 elif patterns:
65 pats = _normalize(patterns, default, root, cwd)
73 pats = _normalize(patterns, default, root, cwd)
66 roots = _roots(pats)
74 roots = _roots(pats)
67 anypats = anypats or _anypats(pats)
75 anypats = anypats or _anypats(pats)
68 pm = _buildmatch(pats, '$')
76 pm = _buildmatch(pats, '$')
69 if include:
70 im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
71 if exclude:
72 em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
73
77
74 if patterns:
78 if patterns or exact:
75 if include:
79 if include:
76 if exclude:
80 if exclude:
77 m = lambda f: im(f) and not em(f) and pm(f)
81 m = lambda f: im(f) and not em(f) and pm(f)
78 else:
82 else:
79 m = lambda f: im(f) and pm(f)
83 m = lambda f: im(f) and pm(f)
80 else:
84 else:
81 if exclude:
85 if exclude:
82 m = lambda f: not em(f) and pm(f)
86 m = lambda f: not em(f) and pm(f)
83 else:
87 else:
84 m = pm
88 m = pm
85 else:
89 else:
86 if include:
90 if include:
87 if exclude:
91 if exclude:
88 m = lambda f: im(f) and not em(f)
92 m = lambda f: im(f) and not em(f)
89 else:
93 else:
90 m = im
94 m = im
91 else:
95 else:
92 if exclude:
96 if exclude:
93 m = lambda f: not em(f)
97 m = lambda f: not em(f)
94 else:
98 else:
95 m = lambda f: True
99 m = lambda f: True
96
100
97 _match.__init__(self, root, cwd, roots, m, anypats)
101 _match.__init__(self, root, cwd, roots, m, anypats)
98
102
99 class exact(_match):
103 class exact(match):
100 def __init__(self, root, cwd, files):
104 def __init__(self, root, cwd, files):
101 _match.__init__(self, root, cwd, files, self.exact, False)
105 match.__init__(self, root, cwd, files, exact = True)
102
106
103 class always(match):
107 class always(match):
104 def __init__(self, root, cwd):
108 def __init__(self, root, cwd):
105 match.__init__(self, root, cwd, [])
109 match.__init__(self, root, cwd, [])
106
110
107 class never(exact):
111 class never(match):
108 def __init__(self, root, cwd):
112 def __init__(self, root, cwd):
109 exact.__init__(self, root, cwd, [])
113 match.__init__(self, root, cwd, [], exact = True)
110
114
111 def patkind(pat):
115 def patkind(pat):
112 return _patsplit(pat, None)[0]
116 return _patsplit(pat, None)[0]
113
117
114 def _patsplit(pat, default):
118 def _patsplit(pat, default):
115 """Split a string into an optional pattern kind prefix and the
119 """Split a string into an optional pattern kind prefix and the
116 actual pattern."""
120 actual pattern."""
117 if ':' in pat:
121 if ':' in pat:
118 pat, val = pat.split(':', 1)
122 pat, val = pat.split(':', 1)
119 if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
123 if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
120 return pat, val
124 return pat, val
121 return default, pat
125 return default, pat
122
126
123 def _globre(pat):
127 def _globre(pat):
124 "convert a glob pattern into a regexp"
128 "convert a glob pattern into a regexp"
125 i, n = 0, len(pat)
129 i, n = 0, len(pat)
126 res = ''
130 res = ''
127 group = 0
131 group = 0
128 escape = re.escape
132 escape = re.escape
129 def peek(): return i < n and pat[i]
133 def peek(): return i < n and pat[i]
130 while i < n:
134 while i < n:
131 c = pat[i]
135 c = pat[i]
132 i = i+1
136 i = i+1
133 if c not in '*?[{},\\':
137 if c not in '*?[{},\\':
134 res += escape(c)
138 res += escape(c)
135 elif c == '*':
139 elif c == '*':
136 if peek() == '*':
140 if peek() == '*':
137 i += 1
141 i += 1
138 res += '.*'
142 res += '.*'
139 else:
143 else:
140 res += '[^/]*'
144 res += '[^/]*'
141 elif c == '?':
145 elif c == '?':
142 res += '.'
146 res += '.'
143 elif c == '[':
147 elif c == '[':
144 j = i
148 j = i
145 if j < n and pat[j] in '!]':
149 if j < n and pat[j] in '!]':
146 j += 1
150 j += 1
147 while j < n and pat[j] != ']':
151 while j < n and pat[j] != ']':
148 j += 1
152 j += 1
149 if j >= n:
153 if j >= n:
150 res += '\\['
154 res += '\\['
151 else:
155 else:
152 stuff = pat[i:j].replace('\\','\\\\')
156 stuff = pat[i:j].replace('\\','\\\\')
153 i = j + 1
157 i = j + 1
154 if stuff[0] == '!':
158 if stuff[0] == '!':
155 stuff = '^' + stuff[1:]
159 stuff = '^' + stuff[1:]
156 elif stuff[0] == '^':
160 elif stuff[0] == '^':
157 stuff = '\\' + stuff
161 stuff = '\\' + stuff
158 res = '%s[%s]' % (res, stuff)
162 res = '%s[%s]' % (res, stuff)
159 elif c == '{':
163 elif c == '{':
160 group += 1
164 group += 1
161 res += '(?:'
165 res += '(?:'
162 elif c == '}' and group:
166 elif c == '}' and group:
163 res += ')'
167 res += ')'
164 group -= 1
168 group -= 1
165 elif c == ',' and group:
169 elif c == ',' and group:
166 res += '|'
170 res += '|'
167 elif c == '\\':
171 elif c == '\\':
168 p = peek()
172 p = peek()
169 if p:
173 if p:
170 i += 1
174 i += 1
171 res += escape(p)
175 res += escape(p)
172 else:
176 else:
173 res += escape(c)
177 res += escape(c)
174 else:
178 else:
175 res += escape(c)
179 res += escape(c)
176 return res
180 return res
177
181
178 def _regex(kind, name, tail):
182 def _regex(kind, name, tail):
179 '''convert a pattern into a regular expression'''
183 '''convert a pattern into a regular expression'''
180 if not name:
184 if not name:
181 return ''
185 return ''
182 if kind == 're':
186 if kind == 're':
183 return name
187 return name
184 elif kind == 'path':
188 elif kind == 'path':
185 return '^' + re.escape(name) + '(?:/|$)'
189 return '^' + re.escape(name) + '(?:/|$)'
186 elif kind == 'relglob':
190 elif kind == 'relglob':
187 return '(?:|.*/)' + _globre(name) + tail
191 return '(?:|.*/)' + _globre(name) + tail
188 elif kind == 'relpath':
192 elif kind == 'relpath':
189 return re.escape(name) + '(?:/|$)'
193 return re.escape(name) + '(?:/|$)'
190 elif kind == 'relre':
194 elif kind == 'relre':
191 if name.startswith('^'):
195 if name.startswith('^'):
192 return name
196 return name
193 return '.*' + name
197 return '.*' + name
194 return _globre(name) + tail
198 return _globre(name) + tail
195
199
196 def _buildmatch(pats, tail):
200 def _buildmatch(pats, tail):
197 """build a matching function from a set of patterns"""
201 """build a matching function from a set of patterns"""
198 try:
202 try:
199 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
203 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
200 if len(pat) > 20000:
204 if len(pat) > 20000:
201 raise OverflowError()
205 raise OverflowError()
202 return re.compile(pat).match
206 return re.compile(pat).match
203 except OverflowError:
207 except OverflowError:
204 # We're using a Python with a tiny regex engine and we
208 # We're using a Python with a tiny regex engine and we
205 # made it explode, so we'll divide the pattern list in two
209 # made it explode, so we'll divide the pattern list in two
206 # until it works
210 # until it works
207 l = len(pats)
211 l = len(pats)
208 if l < 2:
212 if l < 2:
209 raise
213 raise
210 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
214 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
211 return lambda s: a(s) or b(s)
215 return lambda s: a(s) or b(s)
212 except re.error:
216 except re.error:
213 for k, p in pats:
217 for k, p in pats:
214 try:
218 try:
215 re.compile('(?:%s)' % _regex(k, p, tail))
219 re.compile('(?:%s)' % _regex(k, p, tail))
216 except re.error:
220 except re.error:
217 raise util.Abort("invalid pattern (%s): %s" % (k, p))
221 raise util.Abort("invalid pattern (%s): %s" % (k, p))
218 raise util.Abort("invalid pattern")
222 raise util.Abort("invalid pattern")
219
223
220 def _normalize(names, default, root, cwd):
224 def _normalize(names, default, root, cwd):
221 pats = []
225 pats = []
222 for kind, name in [_patsplit(p, default) for p in names]:
226 for kind, name in [_patsplit(p, default) for p in names]:
223 if kind in ('glob', 'relpath'):
227 if kind in ('glob', 'relpath'):
224 name = util.canonpath(root, cwd, name)
228 name = util.canonpath(root, cwd, name)
225 elif kind in ('relglob', 'path'):
229 elif kind in ('relglob', 'path'):
226 name = util.normpath(name)
230 name = util.normpath(name)
227
231
228 pats.append((kind, name))
232 pats.append((kind, name))
229 return pats
233 return pats
230
234
231 def _roots(patterns):
235 def _roots(patterns):
232 r = []
236 r = []
233 for kind, name in patterns:
237 for kind, name in patterns:
234 if kind == 'glob': # find the non-glob prefix
238 if kind == 'glob': # find the non-glob prefix
235 root = []
239 root = []
236 for p in name.split('/'):
240 for p in name.split('/'):
237 if '[' in p or '{' in p or '*' in p or '?' in p:
241 if '[' in p or '{' in p or '*' in p or '?' in p:
238 break
242 break
239 root.append(p)
243 root.append(p)
240 r.append('/'.join(root) or '.')
244 r.append('/'.join(root) or '.')
241 elif kind in ('relpath', 'path'):
245 elif kind in ('relpath', 'path'):
242 r.append(name or '.')
246 r.append(name or '.')
243 elif kind == 'relglob':
247 elif kind == 'relglob':
244 r.append('.')
248 r.append('.')
245 return r
249 return r
246
250
247 def _anypats(patterns):
251 def _anypats(patterns):
248 for kind, name in patterns:
252 for kind, name in patterns:
249 if kind in ('glob', 're', 'relglob', 'relre'):
253 if kind in ('glob', 're', 'relglob', 'relre'):
250 return True
254 return True
General Comments 0
You need to be logged in to leave comments. Login now