##// END OF EJS Templates
match: tweak some names
Matt Mackall -
r8578:8388ef8d default
parent child Browse files
Show More
@@ -1,261 +1,261 b''
1 # match.py - file name matching
1 # match.py - file name matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 import util, re
8 import util, re
9
9
10 class _match(object):
10 class _match(object):
11 def __init__(self, root, cwd, files, mf, ap):
11 def __init__(self, root, cwd, files, mf, ap):
12 self._root = root
12 self._root = root
13 self._cwd = cwd
13 self._cwd = cwd
14 self._files = files
14 self._files = files
15 self._fmap = set(files)
15 self._fmap = set(files)
16 self.matchfn = mf
16 self.matchfn = mf
17 self._anypats = ap
17 self._anypats = ap
18 def __call__(self, fn):
18 def __call__(self, fn):
19 return self.matchfn(fn)
19 return self.matchfn(fn)
20 def __iter__(self):
20 def __iter__(self):
21 for f in self._files:
21 for f in self._files:
22 yield f
22 yield f
23 def bad(self, f, msg):
23 def bad(self, f, msg):
24 return True
24 return True
25 def dir(self, f):
25 def dir(self, f):
26 pass
26 pass
27 def missing(self, f):
27 def missing(self, f):
28 pass
28 pass
29 def exact(self, f):
29 def exact(self, f):
30 return f in self._fmap
30 return f in self._fmap
31 def rel(self, f):
31 def rel(self, f):
32 return util.pathto(self._root, self._cwd, f)
32 return util.pathto(self._root, self._cwd, f)
33 def files(self):
33 def files(self):
34 return self._files
34 return self._files
35 def anypats(self):
35 def anypats(self):
36 return self._anypats
36 return self._anypats
37
37
38 class always(_match):
38 class always(_match):
39 def __init__(self, root, cwd):
39 def __init__(self, root, cwd):
40 _match.__init__(self, root, cwd, [], lambda f: True, False)
40 _match.__init__(self, root, cwd, [], lambda f: True, False)
41
41
42 class never(_match):
42 class never(_match):
43 def __init__(self, root, cwd):
43 def __init__(self, root, cwd):
44 _match.__init__(self, root, cwd, [], lambda f: False, False)
44 _match.__init__(self, root, cwd, [], lambda f: False, False)
45
45
46 class exact(_match):
46 class exact(_match):
47 def __init__(self, root, cwd, files):
47 def __init__(self, root, cwd, files):
48 _match.__init__(self, root, cwd, files, self.exact, False)
48 _match.__init__(self, root, cwd, files, self.exact, False)
49
49
50 class match(_match):
50 class match(_match):
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 default='glob'):
52 default='glob'):
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
54 _match.__init__(self, root, cwd, f, mf, ap)
54 _match.__init__(self, root, cwd, f, mf, ap)
55
55
56 def patkind(pat):
56 def patkind(pat):
57 return _patsplit(pat, None)[0]
57 return _patsplit(pat, None)[0]
58
58
59 def _patsplit(pat, default):
59 def _patsplit(pat, default):
60 """Split a string into an optional pattern kind prefix and the
60 """Split a string into an optional pattern kind prefix and the
61 actual pattern."""
61 actual pattern."""
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
64 return default, pat
64 return default, pat
65
65
66 def _globre(pat, head, tail):
66 def _globre(pat, head, tail):
67 "convert a glob pattern into a regexp"
67 "convert a glob pattern into a regexp"
68 i, n = 0, len(pat)
68 i, n = 0, len(pat)
69 res = ''
69 res = ''
70 group = 0
70 group = 0
71 def peek(): return i < n and pat[i]
71 def peek(): return i < n and pat[i]
72 while i < n:
72 while i < n:
73 c = pat[i]
73 c = pat[i]
74 i = i+1
74 i = i+1
75 if c == '*':
75 if c == '*':
76 if peek() == '*':
76 if peek() == '*':
77 i += 1
77 i += 1
78 res += '.*'
78 res += '.*'
79 else:
79 else:
80 res += '[^/]*'
80 res += '[^/]*'
81 elif c == '?':
81 elif c == '?':
82 res += '.'
82 res += '.'
83 elif c == '[':
83 elif c == '[':
84 j = i
84 j = i
85 if j < n and pat[j] in '!]':
85 if j < n and pat[j] in '!]':
86 j += 1
86 j += 1
87 while j < n and pat[j] != ']':
87 while j < n and pat[j] != ']':
88 j += 1
88 j += 1
89 if j >= n:
89 if j >= n:
90 res += '\\['
90 res += '\\['
91 else:
91 else:
92 stuff = pat[i:j].replace('\\','\\\\')
92 stuff = pat[i:j].replace('\\','\\\\')
93 i = j + 1
93 i = j + 1
94 if stuff[0] == '!':
94 if stuff[0] == '!':
95 stuff = '^' + stuff[1:]
95 stuff = '^' + stuff[1:]
96 elif stuff[0] == '^':
96 elif stuff[0] == '^':
97 stuff = '\\' + stuff
97 stuff = '\\' + stuff
98 res = '%s[%s]' % (res, stuff)
98 res = '%s[%s]' % (res, stuff)
99 elif c == '{':
99 elif c == '{':
100 group += 1
100 group += 1
101 res += '(?:'
101 res += '(?:'
102 elif c == '}' and group:
102 elif c == '}' and group:
103 res += ')'
103 res += ')'
104 group -= 1
104 group -= 1
105 elif c == ',' and group:
105 elif c == ',' and group:
106 res += '|'
106 res += '|'
107 elif c == '\\':
107 elif c == '\\':
108 p = peek()
108 p = peek()
109 if p:
109 if p:
110 i += 1
110 i += 1
111 res += re.escape(p)
111 res += re.escape(p)
112 else:
112 else:
113 res += re.escape(c)
113 res += re.escape(c)
114 else:
114 else:
115 res += re.escape(c)
115 res += re.escape(c)
116 return head + res + tail
116 return head + res + tail
117
117
118 def _regex(kind, name, tail):
118 def _regex(kind, name, tail):
119 '''convert a pattern into a regular expression'''
119 '''convert a pattern into a regular expression'''
120 if not name:
120 if not name:
121 return ''
121 return ''
122 if kind == 're':
122 if kind == 're':
123 return name
123 return name
124 elif kind == 'path':
124 elif kind == 'path':
125 return '^' + re.escape(name) + '(?:/|$)'
125 return '^' + re.escape(name) + '(?:/|$)'
126 elif kind == 'relglob':
126 elif kind == 'relglob':
127 return _globre(name, '(?:|.*/)', tail)
127 return _globre(name, '(?:|.*/)', tail)
128 elif kind == 'relpath':
128 elif kind == 'relpath':
129 return re.escape(name) + '(?:/|$)'
129 return re.escape(name) + '(?:/|$)'
130 elif kind == 'relre':
130 elif kind == 'relre':
131 if name.startswith('^'):
131 if name.startswith('^'):
132 return name
132 return name
133 return '.*' + name
133 return '.*' + name
134 return _globre(name, '', tail)
134 return _globre(name, '', tail)
135
135
136 def _matchfn(pats, tail):
136 def _matchfn(pats, tail):
137 """build a matching function from a set of patterns"""
137 """build a matching function from a set of patterns"""
138 try:
138 try:
139 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
139 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
140 if len(pat) > 20000:
140 if len(pat) > 20000:
141 raise OverflowError()
141 raise OverflowError()
142 return re.compile(pat).match
142 return re.compile(pat).match
143 except OverflowError:
143 except OverflowError:
144 # We're using a Python with a tiny regex engine and we
144 # We're using a Python with a tiny regex engine and we
145 # made it explode, so we'll divide the pattern list in two
145 # made it explode, so we'll divide the pattern list in two
146 # until it works
146 # until it works
147 l = len(pats)
147 l = len(pats)
148 if l < 2:
148 if l < 2:
149 raise
149 raise
150 a, b = _matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
150 a, b = _matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
151 return lambda s: a(s) or b(s)
151 return lambda s: a(s) or b(s)
152 except re.error:
152 except re.error:
153 for k, p in pats:
153 for k, p in pats:
154 try:
154 try:
155 re.compile('(?:%s)' % _regex(k, p, tail))
155 re.compile('(?:%s)' % _regex(k, p, tail))
156 except re.error:
156 except re.error:
157 raise util.Abort("invalid pattern (%s): %s" % (k, p))
157 raise util.Abort("invalid pattern (%s): %s" % (k, p))
158 raise util.Abort("invalid pattern")
158 raise util.Abort("invalid pattern")
159
159
160 def _globprefix(pat):
160 def _globprefix(pat):
161 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
161 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
162 root = []
162 root = []
163 for p in pat.split('/'):
163 for p in pat.split('/'):
164 if '[' in p or '{' in p or '*' in p or '?' in p:
164 if '[' in p or '{' in p or '*' in p or '?' in p:
165 break
165 break
166 root.append(p)
166 root.append(p)
167 return '/'.join(root) or '.'
167 return '/'.join(root) or '.'
168
168
169 def _normalizepats(names, default, canonroot, cwd):
169 def _normalize(names, default, root, cwd):
170 pats = []
170 pats = []
171 for kind, name in [_patsplit(p, default) for p in names]:
171 for kind, name in [_patsplit(p, default) for p in names]:
172 if kind in ('glob', 'relpath'):
172 if kind in ('glob', 'relpath'):
173 name = util.canonpath(canonroot, cwd, name)
173 name = util.canonpath(root, cwd, name)
174 elif kind in ('relglob', 'path'):
174 elif kind in ('relglob', 'path'):
175 name = util.normpath(name)
175 name = util.normpath(name)
176
176
177 pats.append((kind, name))
177 pats.append((kind, name))
178 return pats
178 return pats
179
179
180 def _roots(patterns):
180 def _roots(patterns):
181 r = []
181 r = []
182 for kind, name in patterns:
182 for kind, name in patterns:
183 if kind == 'glob':
183 if kind == 'glob':
184 r.append(_globprefix(name))
184 r.append(_globprefix(name))
185 elif kind in ('relpath', 'path'):
185 elif kind in ('relpath', 'path'):
186 r.append(name or '.')
186 r.append(name or '.')
187 elif kind == 'relglob':
187 elif kind == 'relglob':
188 r.append('.')
188 r.append('.')
189 return r
189 return r
190
190
191 def _anypats(patterns):
191 def _anypats(patterns):
192 for kind, name in patterns:
192 for kind, name in patterns:
193 if kind in ('glob', 're', 'relglob', 'relre'):
193 if kind in ('glob', 're', 'relglob', 'relre'):
194 return True
194 return True
195
195
196 def _matcher(root, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
196 def _matcher(root, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
197 """build a function to match a set of file patterns
197 """build a function to match a set of file patterns
198
198
199 arguments:
199 arguments:
200 root - the canonical root of the tree you're matching against
200 root - the canonical root of the tree you're matching against
201 cwd - the current working directory, if relevant
201 cwd - the current working directory, if relevant
202 names - patterns to find
202 names - patterns to find
203 inc - patterns to include
203 inc - patterns to include
204 exc - patterns to exclude
204 exc - patterns to exclude
205 dflt_pat - if a pattern in names has no explicit type, assume this one
205 dflt_pat - if a pattern in names has no explicit type, assume this one
206
206
207 a pattern is one of:
207 a pattern is one of:
208 'glob:<glob>' - a glob relative to cwd
208 'glob:<glob>' - a glob relative to cwd
209 're:<regexp>' - a regular expression
209 're:<regexp>' - a regular expression
210 'path:<path>' - a path relative to canonroot
210 'path:<path>' - a path relative to canonroot
211 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
211 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
212 'relpath:<path>' - a path relative to cwd
212 'relpath:<path>' - a path relative to cwd
213 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
213 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
214 '<something>' - one of the cases above, selected by the dflt_pat argument
214 '<something>' - one of the cases above, selected by the dflt_pat argument
215
215
216 returns:
216 returns:
217 a 3-tuple containing
217 a 3-tuple containing
218 - list of roots (places where one should start a recursive walk of the fs);
218 - list of roots (places where one should start a recursive walk of the fs);
219 this often matches the explicit non-pattern names passed in, but also
219 this often matches the explicit non-pattern names passed in, but also
220 includes the initial part of glob: patterns that has no glob characters
220 includes the initial part of glob: patterns that has no glob characters
221 - a bool match(filename) function
221 - a bool match(filename) function
222 - a bool indicating if any patterns were passed in
222 - a bool indicating if any patterns were passed in
223 """
223 """
224
224
225 roots = []
225 roots = []
226 anypats = bool(inc or exc)
226 anypats = bool(inc or exc)
227
227
228 if names:
228 if names:
229 pats = _normalizepats(names, dflt_pat, root, cwd)
229 pats = _normalize(names, dflt_pat, root, cwd)
230 roots = _roots(pats)
230 roots = _roots(pats)
231 anypats = anypats or _anypats(pats)
231 anypats = anypats or _anypats(pats)
232 patmatch = _matchfn(pats, '$')
232 patmatch = _matchfn(pats, '$')
233 if inc:
233 if inc:
234 incmatch = _matchfn(_normalizepats(inc, 'glob', root, cwd), '(?:/|$)')
234 incmatch = _matchfn(_normalize(inc, 'glob', root, cwd), '(?:/|$)')
235 if exc:
235 if exc:
236 excmatch = _matchfn(_normalizepats(exc, 'glob', root, cwd), '(?:/|$)')
236 excmatch = _matchfn(_normalize(exc, 'glob', root, cwd), '(?:/|$)')
237
237
238 if names:
238 if names:
239 if inc:
239 if inc:
240 if exc:
240 if exc:
241 m = lambda f: incmatch(f) and not excmatch(f) and patmatch(f)
241 m = lambda f: incmatch(f) and not excmatch(f) and patmatch(f)
242 else:
242 else:
243 m = lambda f: incmatch(f) and patmatch(f)
243 m = lambda f: incmatch(f) and patmatch(f)
244 else:
244 else:
245 if exc:
245 if exc:
246 m = lambda f: not excmatch(f) and patmatch(f)
246 m = lambda f: not excmatch(f) and patmatch(f)
247 else:
247 else:
248 m = patmatch
248 m = patmatch
249 else:
249 else:
250 if inc:
250 if inc:
251 if exc:
251 if exc:
252 m = lambda f: incmatch(f) and not excmatch(f)
252 m = lambda f: incmatch(f) and not excmatch(f)
253 else:
253 else:
254 m = incmatch
254 m = incmatch
255 else:
255 else:
256 if exc:
256 if exc:
257 m = lambda f: not excmatch(f)
257 m = lambda f: not excmatch(f)
258 else:
258 else:
259 m = lambda f: True
259 m = lambda f: True
260
260
261 return (roots, m, anypats)
261 return (roots, m, anypats)
General Comments 0
You need to be logged in to leave comments. Login now