##// END OF EJS Templates
match: optimize _patsplit
Matt Mackall -
r8579:aff7f83c default
parent child Browse files
Show More
@@ -1,261 +1,263 b''
1 1 # match.py - file name matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 import util, re
9 9
10 10 class _match(object):
11 11 def __init__(self, root, cwd, files, mf, ap):
12 12 self._root = root
13 13 self._cwd = cwd
14 14 self._files = files
15 15 self._fmap = set(files)
16 16 self.matchfn = mf
17 17 self._anypats = ap
18 18 def __call__(self, fn):
19 19 return self.matchfn(fn)
20 20 def __iter__(self):
21 21 for f in self._files:
22 22 yield f
23 23 def bad(self, f, msg):
24 24 return True
25 25 def dir(self, f):
26 26 pass
27 27 def missing(self, f):
28 28 pass
29 29 def exact(self, f):
30 30 return f in self._fmap
31 31 def rel(self, f):
32 32 return util.pathto(self._root, self._cwd, f)
33 33 def files(self):
34 34 return self._files
35 35 def anypats(self):
36 36 return self._anypats
37 37
38 38 class always(_match):
39 39 def __init__(self, root, cwd):
40 40 _match.__init__(self, root, cwd, [], lambda f: True, False)
41 41
42 42 class never(_match):
43 43 def __init__(self, root, cwd):
44 44 _match.__init__(self, root, cwd, [], lambda f: False, False)
45 45
46 46 class exact(_match):
47 47 def __init__(self, root, cwd, files):
48 48 _match.__init__(self, root, cwd, files, self.exact, False)
49 49
50 50 class match(_match):
51 51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 52 default='glob'):
53 53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
54 54 _match.__init__(self, root, cwd, f, mf, ap)
55 55
56 56 def patkind(pat):
57 57 return _patsplit(pat, None)[0]
58 58
59 59 def _patsplit(pat, default):
60 60 """Split a string into an optional pattern kind prefix and the
61 61 actual pattern."""
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
62 if ':' in pat:
63 pat, val = pat.split(':', 1)
64 if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
65 return pat, val
64 66 return default, pat
65 67
66 68 def _globre(pat, head, tail):
67 69 "convert a glob pattern into a regexp"
68 70 i, n = 0, len(pat)
69 71 res = ''
70 72 group = 0
71 73 def peek(): return i < n and pat[i]
72 74 while i < n:
73 75 c = pat[i]
74 76 i = i+1
75 77 if c == '*':
76 78 if peek() == '*':
77 79 i += 1
78 80 res += '.*'
79 81 else:
80 82 res += '[^/]*'
81 83 elif c == '?':
82 84 res += '.'
83 85 elif c == '[':
84 86 j = i
85 87 if j < n and pat[j] in '!]':
86 88 j += 1
87 89 while j < n and pat[j] != ']':
88 90 j += 1
89 91 if j >= n:
90 92 res += '\\['
91 93 else:
92 94 stuff = pat[i:j].replace('\\','\\\\')
93 95 i = j + 1
94 96 if stuff[0] == '!':
95 97 stuff = '^' + stuff[1:]
96 98 elif stuff[0] == '^':
97 99 stuff = '\\' + stuff
98 100 res = '%s[%s]' % (res, stuff)
99 101 elif c == '{':
100 102 group += 1
101 103 res += '(?:'
102 104 elif c == '}' and group:
103 105 res += ')'
104 106 group -= 1
105 107 elif c == ',' and group:
106 108 res += '|'
107 109 elif c == '\\':
108 110 p = peek()
109 111 if p:
110 112 i += 1
111 113 res += re.escape(p)
112 114 else:
113 115 res += re.escape(c)
114 116 else:
115 117 res += re.escape(c)
116 118 return head + res + tail
117 119
118 120 def _regex(kind, name, tail):
119 121 '''convert a pattern into a regular expression'''
120 122 if not name:
121 123 return ''
122 124 if kind == 're':
123 125 return name
124 126 elif kind == 'path':
125 127 return '^' + re.escape(name) + '(?:/|$)'
126 128 elif kind == 'relglob':
127 129 return _globre(name, '(?:|.*/)', tail)
128 130 elif kind == 'relpath':
129 131 return re.escape(name) + '(?:/|$)'
130 132 elif kind == 'relre':
131 133 if name.startswith('^'):
132 134 return name
133 135 return '.*' + name
134 136 return _globre(name, '', tail)
135 137
136 138 def _matchfn(pats, tail):
137 139 """build a matching function from a set of patterns"""
138 140 try:
139 141 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
140 142 if len(pat) > 20000:
141 143 raise OverflowError()
142 144 return re.compile(pat).match
143 145 except OverflowError:
144 146 # We're using a Python with a tiny regex engine and we
145 147 # made it explode, so we'll divide the pattern list in two
146 148 # until it works
147 149 l = len(pats)
148 150 if l < 2:
149 151 raise
150 152 a, b = _matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
151 153 return lambda s: a(s) or b(s)
152 154 except re.error:
153 155 for k, p in pats:
154 156 try:
155 157 re.compile('(?:%s)' % _regex(k, p, tail))
156 158 except re.error:
157 159 raise util.Abort("invalid pattern (%s): %s" % (k, p))
158 160 raise util.Abort("invalid pattern")
159 161
160 162 def _globprefix(pat):
161 163 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
162 164 root = []
163 165 for p in pat.split('/'):
164 166 if '[' in p or '{' in p or '*' in p or '?' in p:
165 167 break
166 168 root.append(p)
167 169 return '/'.join(root) or '.'
168 170
169 171 def _normalize(names, default, root, cwd):
170 172 pats = []
171 173 for kind, name in [_patsplit(p, default) for p in names]:
172 174 if kind in ('glob', 'relpath'):
173 175 name = util.canonpath(root, cwd, name)
174 176 elif kind in ('relglob', 'path'):
175 177 name = util.normpath(name)
176 178
177 179 pats.append((kind, name))
178 180 return pats
179 181
180 182 def _roots(patterns):
181 183 r = []
182 184 for kind, name in patterns:
183 185 if kind == 'glob':
184 186 r.append(_globprefix(name))
185 187 elif kind in ('relpath', 'path'):
186 188 r.append(name or '.')
187 189 elif kind == 'relglob':
188 190 r.append('.')
189 191 return r
190 192
191 193 def _anypats(patterns):
192 194 for kind, name in patterns:
193 195 if kind in ('glob', 're', 'relglob', 'relre'):
194 196 return True
195 197
196 198 def _matcher(root, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
197 199 """build a function to match a set of file patterns
198 200
199 201 arguments:
200 202 root - the canonical root of the tree you're matching against
201 203 cwd - the current working directory, if relevant
202 204 names - patterns to find
203 205 inc - patterns to include
204 206 exc - patterns to exclude
205 207 dflt_pat - if a pattern in names has no explicit type, assume this one
206 208
207 209 a pattern is one of:
208 210 'glob:<glob>' - a glob relative to cwd
209 211 're:<regexp>' - a regular expression
210 212 'path:<path>' - a path relative to canonroot
211 213 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
212 214 'relpath:<path>' - a path relative to cwd
213 215 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
214 216 '<something>' - one of the cases above, selected by the dflt_pat argument
215 217
216 218 returns:
217 219 a 3-tuple containing
218 220 - list of roots (places where one should start a recursive walk of the fs);
219 221 this often matches the explicit non-pattern names passed in, but also
220 222 includes the initial part of glob: patterns that has no glob characters
221 223 - a bool match(filename) function
222 224 - a bool indicating if any patterns were passed in
223 225 """
224 226
225 227 roots = []
226 228 anypats = bool(inc or exc)
227 229
228 230 if names:
229 231 pats = _normalize(names, dflt_pat, root, cwd)
230 232 roots = _roots(pats)
231 233 anypats = anypats or _anypats(pats)
232 234 patmatch = _matchfn(pats, '$')
233 235 if inc:
234 236 incmatch = _matchfn(_normalize(inc, 'glob', root, cwd), '(?:/|$)')
235 237 if exc:
236 238 excmatch = _matchfn(_normalize(exc, 'glob', root, cwd), '(?:/|$)')
237 239
238 240 if names:
239 241 if inc:
240 242 if exc:
241 243 m = lambda f: incmatch(f) and not excmatch(f) and patmatch(f)
242 244 else:
243 245 m = lambda f: incmatch(f) and patmatch(f)
244 246 else:
245 247 if exc:
246 248 m = lambda f: not excmatch(f) and patmatch(f)
247 249 else:
248 250 m = patmatch
249 251 else:
250 252 if inc:
251 253 if exc:
252 254 m = lambda f: incmatch(f) and not excmatch(f)
253 255 else:
254 256 m = incmatch
255 257 else:
256 258 if exc:
257 259 m = lambda f: not excmatch(f)
258 260 else:
259 261 m = lambda f: True
260 262
261 263 return (roots, m, anypats)
General Comments 0
You need to be logged in to leave comments. Login now