##// END OF EJS Templates
match: optimize escaping in _globre...
Matt Mackall -
r8583:19d1b2ae default
parent child Browse files
Show More
@@ -1,251 +1,254 b''
1 1 # match.py - file name matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 import util, re
9 9
10 10 class _match(object):
11 11 def __init__(self, root, cwd, files, mf, ap):
12 12 self._root = root
13 13 self._cwd = cwd
14 14 self._files = files
15 15 self._fmap = set(files)
16 16 self.matchfn = mf
17 17 self._anypats = ap
18 18 def __call__(self, fn):
19 19 return self.matchfn(fn)
20 20 def __iter__(self):
21 21 for f in self._files:
22 22 yield f
23 23 def bad(self, f, msg):
24 24 return True
25 25 def dir(self, f):
26 26 pass
27 27 def missing(self, f):
28 28 pass
29 29 def exact(self, f):
30 30 return f in self._fmap
31 31 def rel(self, f):
32 32 return util.pathto(self._root, self._cwd, f)
33 33 def files(self):
34 34 return self._files
35 35 def anypats(self):
36 36 return self._anypats
37 37
38 38 class always(_match):
39 39 def __init__(self, root, cwd):
40 40 _match.__init__(self, root, cwd, [], lambda f: True, False)
41 41
42 42 class never(_match):
43 43 def __init__(self, root, cwd):
44 44 _match.__init__(self, root, cwd, [], lambda f: False, False)
45 45
46 46 class exact(_match):
47 47 def __init__(self, root, cwd, files):
48 48 _match.__init__(self, root, cwd, files, self.exact, False)
49 49
50 50 class match(_match):
51 51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 52 default='glob'):
53 53 """build an object to match a set of file patterns
54 54
55 55 arguments:
56 56 root - the canonical root of the tree you're matching against
57 57 cwd - the current working directory, if relevant
58 58 patterns - patterns to find
59 59 include - patterns to include
60 60 exclude - patterns to exclude
61 61 default - if a pattern in names has no explicit type, assume this one
62 62
63 63 a pattern is one of:
64 64 'glob:<glob>' - a glob relative to cwd
65 65 're:<regexp>' - a regular expression
66 66 'path:<path>' - a path relative to canonroot
67 67 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
68 68 'relpath:<path>' - a path relative to cwd
69 69 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
70 70 '<something>' - one of the cases above, selected by the dflt_pat argument
71 71 """
72 72
73 73 roots = []
74 74 anypats = bool(include or exclude)
75 75
76 76 if patterns:
77 77 pats = _normalize(patterns, default, root, cwd)
78 78 roots = _roots(pats)
79 79 anypats = anypats or _anypats(pats)
80 80 pm = _buildmatch(pats, '$')
81 81 if include:
82 82 im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
83 83 if exclude:
84 84 em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
85 85
86 86 if patterns:
87 87 if include:
88 88 if exclude:
89 89 m = lambda f: im(f) and not em(f) and pm(f)
90 90 else:
91 91 m = lambda f: im(f) and pm(f)
92 92 else:
93 93 if exclude:
94 94 m = lambda f: not em(f) and pm(f)
95 95 else:
96 96 m = pm
97 97 else:
98 98 if include:
99 99 if exclude:
100 100 m = lambda f: im(f) and not em(f)
101 101 else:
102 102 m = im
103 103 else:
104 104 if exclude:
105 105 m = lambda f: not em(f)
106 106 else:
107 107 m = lambda f: True
108 108
109 109 _match.__init__(self, root, cwd, roots, m, anypats)
110 110
111 111 def patkind(pat):
112 112 return _patsplit(pat, None)[0]
113 113
114 114 def _patsplit(pat, default):
115 115 """Split a string into an optional pattern kind prefix and the
116 116 actual pattern."""
117 117 if ':' in pat:
118 118 pat, val = pat.split(':', 1)
119 119 if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
120 120 return pat, val
121 121 return default, pat
122 122
123 123 def _globre(pat):
124 124 "convert a glob pattern into a regexp"
125 125 i, n = 0, len(pat)
126 126 res = ''
127 127 group = 0
128 escape = re.escape
128 129 def peek(): return i < n and pat[i]
129 130 while i < n:
130 131 c = pat[i]
131 132 i = i+1
132 if c == '*':
133 if c not in '*?[{},\\':
134 res += escape(c)
135 elif c == '*':
133 136 if peek() == '*':
134 137 i += 1
135 138 res += '.*'
136 139 else:
137 140 res += '[^/]*'
138 141 elif c == '?':
139 142 res += '.'
140 143 elif c == '[':
141 144 j = i
142 145 if j < n and pat[j] in '!]':
143 146 j += 1
144 147 while j < n and pat[j] != ']':
145 148 j += 1
146 149 if j >= n:
147 150 res += '\\['
148 151 else:
149 152 stuff = pat[i:j].replace('\\','\\\\')
150 153 i = j + 1
151 154 if stuff[0] == '!':
152 155 stuff = '^' + stuff[1:]
153 156 elif stuff[0] == '^':
154 157 stuff = '\\' + stuff
155 158 res = '%s[%s]' % (res, stuff)
156 159 elif c == '{':
157 160 group += 1
158 161 res += '(?:'
159 162 elif c == '}' and group:
160 163 res += ')'
161 164 group -= 1
162 165 elif c == ',' and group:
163 166 res += '|'
164 167 elif c == '\\':
165 168 p = peek()
166 169 if p:
167 170 i += 1
168 res += re.escape(p)
171 res += escape(p)
169 172 else:
170 res += re.escape(c)
173 res += escape(c)
171 174 else:
172 res += re.escape(c)
175 res += escape(c)
173 176 return res
174 177
175 178 def _regex(kind, name, tail):
176 179 '''convert a pattern into a regular expression'''
177 180 if not name:
178 181 return ''
179 182 if kind == 're':
180 183 return name
181 184 elif kind == 'path':
182 185 return '^' + re.escape(name) + '(?:/|$)'
183 186 elif kind == 'relglob':
184 187 return '(?:|.*/)' + _globre(name) + tail
185 188 elif kind == 'relpath':
186 189 return re.escape(name) + '(?:/|$)'
187 190 elif kind == 'relre':
188 191 if name.startswith('^'):
189 192 return name
190 193 return '.*' + name
191 194 return _globre(name) + tail
192 195
193 196 def _buildmatch(pats, tail):
194 197 """build a matching function from a set of patterns"""
195 198 try:
196 199 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
197 200 if len(pat) > 20000:
198 201 raise OverflowError()
199 202 return re.compile(pat).match
200 203 except OverflowError:
201 204 # We're using a Python with a tiny regex engine and we
202 205 # made it explode, so we'll divide the pattern list in two
203 206 # until it works
204 207 l = len(pats)
205 208 if l < 2:
206 209 raise
207 210 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
208 211 return lambda s: a(s) or b(s)
209 212 except re.error:
210 213 for k, p in pats:
211 214 try:
212 215 re.compile('(?:%s)' % _regex(k, p, tail))
213 216 except re.error:
214 217 raise util.Abort("invalid pattern (%s): %s" % (k, p))
215 218 raise util.Abort("invalid pattern")
216 219
217 220 def _globprefix(pat):
218 221 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
219 222 root = []
220 223 for p in pat.split('/'):
221 224 if '[' in p or '{' in p or '*' in p or '?' in p:
222 225 break
223 226 root.append(p)
224 227 return '/'.join(root) or '.'
225 228
226 229 def _normalize(names, default, root, cwd):
227 230 pats = []
228 231 for kind, name in [_patsplit(p, default) for p in names]:
229 232 if kind in ('glob', 'relpath'):
230 233 name = util.canonpath(root, cwd, name)
231 234 elif kind in ('relglob', 'path'):
232 235 name = util.normpath(name)
233 236
234 237 pats.append((kind, name))
235 238 return pats
236 239
237 240 def _roots(patterns):
238 241 r = []
239 242 for kind, name in patterns:
240 243 if kind == 'glob':
241 244 r.append(_globprefix(name))
242 245 elif kind in ('relpath', 'path'):
243 246 r.append(name or '.')
244 247 elif kind == 'relglob':
245 248 r.append('.')
246 249 return r
247 250
248 251 def _anypats(patterns):
249 252 for kind, name in patterns:
250 253 if kind in ('glob', 're', 'relglob', 'relre'):
251 254 return True
General Comments 0
You need to be logged in to leave comments. Login now