##// END OF EJS Templates
match: remove head and tail args from _globre
Matt Mackall -
r8582:a4c199e1 default
parent child Browse files
Show More
@@ -1,251 +1,251 b''
1 1 # match.py - file name matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 import util, re
9 9
10 10 class _match(object):
11 11 def __init__(self, root, cwd, files, mf, ap):
12 12 self._root = root
13 13 self._cwd = cwd
14 14 self._files = files
15 15 self._fmap = set(files)
16 16 self.matchfn = mf
17 17 self._anypats = ap
18 18 def __call__(self, fn):
19 19 return self.matchfn(fn)
20 20 def __iter__(self):
21 21 for f in self._files:
22 22 yield f
23 23 def bad(self, f, msg):
24 24 return True
25 25 def dir(self, f):
26 26 pass
27 27 def missing(self, f):
28 28 pass
29 29 def exact(self, f):
30 30 return f in self._fmap
31 31 def rel(self, f):
32 32 return util.pathto(self._root, self._cwd, f)
33 33 def files(self):
34 34 return self._files
35 35 def anypats(self):
36 36 return self._anypats
37 37
38 38 class always(_match):
39 39 def __init__(self, root, cwd):
40 40 _match.__init__(self, root, cwd, [], lambda f: True, False)
41 41
42 42 class never(_match):
43 43 def __init__(self, root, cwd):
44 44 _match.__init__(self, root, cwd, [], lambda f: False, False)
45 45
46 46 class exact(_match):
47 47 def __init__(self, root, cwd, files):
48 48 _match.__init__(self, root, cwd, files, self.exact, False)
49 49
50 50 class match(_match):
51 51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 52 default='glob'):
53 53 """build an object to match a set of file patterns
54 54
55 55 arguments:
56 56 root - the canonical root of the tree you're matching against
57 57 cwd - the current working directory, if relevant
58 58 patterns - patterns to find
59 59 include - patterns to include
60 60 exclude - patterns to exclude
61 61 default - if a pattern in names has no explicit type, assume this one
62 62
63 63 a pattern is one of:
64 64 'glob:<glob>' - a glob relative to cwd
65 65 're:<regexp>' - a regular expression
66 66 'path:<path>' - a path relative to canonroot
67 67 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
68 68 'relpath:<path>' - a path relative to cwd
69 69 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
70 70 '<something>' - one of the cases above, selected by the dflt_pat argument
71 71 """
72 72
73 73 roots = []
74 74 anypats = bool(include or exclude)
75 75
76 76 if patterns:
77 77 pats = _normalize(patterns, default, root, cwd)
78 78 roots = _roots(pats)
79 79 anypats = anypats or _anypats(pats)
80 80 pm = _buildmatch(pats, '$')
81 81 if include:
82 82 im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
83 83 if exclude:
84 84 em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
85 85
86 86 if patterns:
87 87 if include:
88 88 if exclude:
89 89 m = lambda f: im(f) and not em(f) and pm(f)
90 90 else:
91 91 m = lambda f: im(f) and pm(f)
92 92 else:
93 93 if exclude:
94 94 m = lambda f: not em(f) and pm(f)
95 95 else:
96 96 m = pm
97 97 else:
98 98 if include:
99 99 if exclude:
100 100 m = lambda f: im(f) and not em(f)
101 101 else:
102 102 m = im
103 103 else:
104 104 if exclude:
105 105 m = lambda f: not em(f)
106 106 else:
107 107 m = lambda f: True
108 108
109 109 _match.__init__(self, root, cwd, roots, m, anypats)
110 110
111 111 def patkind(pat):
112 112 return _patsplit(pat, None)[0]
113 113
114 114 def _patsplit(pat, default):
115 115 """Split a string into an optional pattern kind prefix and the
116 116 actual pattern."""
117 117 if ':' in pat:
118 118 pat, val = pat.split(':', 1)
119 119 if pat in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
120 120 return pat, val
121 121 return default, pat
122 122
123 def _globre(pat, head, tail):
123 def _globre(pat):
124 124 "convert a glob pattern into a regexp"
125 125 i, n = 0, len(pat)
126 126 res = ''
127 127 group = 0
128 128 def peek(): return i < n and pat[i]
129 129 while i < n:
130 130 c = pat[i]
131 131 i = i+1
132 132 if c == '*':
133 133 if peek() == '*':
134 134 i += 1
135 135 res += '.*'
136 136 else:
137 137 res += '[^/]*'
138 138 elif c == '?':
139 139 res += '.'
140 140 elif c == '[':
141 141 j = i
142 142 if j < n and pat[j] in '!]':
143 143 j += 1
144 144 while j < n and pat[j] != ']':
145 145 j += 1
146 146 if j >= n:
147 147 res += '\\['
148 148 else:
149 149 stuff = pat[i:j].replace('\\','\\\\')
150 150 i = j + 1
151 151 if stuff[0] == '!':
152 152 stuff = '^' + stuff[1:]
153 153 elif stuff[0] == '^':
154 154 stuff = '\\' + stuff
155 155 res = '%s[%s]' % (res, stuff)
156 156 elif c == '{':
157 157 group += 1
158 158 res += '(?:'
159 159 elif c == '}' and group:
160 160 res += ')'
161 161 group -= 1
162 162 elif c == ',' and group:
163 163 res += '|'
164 164 elif c == '\\':
165 165 p = peek()
166 166 if p:
167 167 i += 1
168 168 res += re.escape(p)
169 169 else:
170 170 res += re.escape(c)
171 171 else:
172 172 res += re.escape(c)
173 return head + res + tail
173 return res
174 174
175 175 def _regex(kind, name, tail):
176 176 '''convert a pattern into a regular expression'''
177 177 if not name:
178 178 return ''
179 179 if kind == 're':
180 180 return name
181 181 elif kind == 'path':
182 182 return '^' + re.escape(name) + '(?:/|$)'
183 183 elif kind == 'relglob':
184 return _globre(name, '(?:|.*/)', tail)
184 return '(?:|.*/)' + _globre(name) + tail
185 185 elif kind == 'relpath':
186 186 return re.escape(name) + '(?:/|$)'
187 187 elif kind == 'relre':
188 188 if name.startswith('^'):
189 189 return name
190 190 return '.*' + name
191 return _globre(name, '', tail)
191 return _globre(name) + tail
192 192
193 193 def _buildmatch(pats, tail):
194 194 """build a matching function from a set of patterns"""
195 195 try:
196 196 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
197 197 if len(pat) > 20000:
198 198 raise OverflowError()
199 199 return re.compile(pat).match
200 200 except OverflowError:
201 201 # We're using a Python with a tiny regex engine and we
202 202 # made it explode, so we'll divide the pattern list in two
203 203 # until it works
204 204 l = len(pats)
205 205 if l < 2:
206 206 raise
207 207 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
208 208 return lambda s: a(s) or b(s)
209 209 except re.error:
210 210 for k, p in pats:
211 211 try:
212 212 re.compile('(?:%s)' % _regex(k, p, tail))
213 213 except re.error:
214 214 raise util.Abort("invalid pattern (%s): %s" % (k, p))
215 215 raise util.Abort("invalid pattern")
216 216
217 217 def _globprefix(pat):
218 218 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
219 219 root = []
220 220 for p in pat.split('/'):
221 221 if '[' in p or '{' in p or '*' in p or '?' in p:
222 222 break
223 223 root.append(p)
224 224 return '/'.join(root) or '.'
225 225
226 226 def _normalize(names, default, root, cwd):
227 227 pats = []
228 228 for kind, name in [_patsplit(p, default) for p in names]:
229 229 if kind in ('glob', 'relpath'):
230 230 name = util.canonpath(root, cwd, name)
231 231 elif kind in ('relglob', 'path'):
232 232 name = util.normpath(name)
233 233
234 234 pats.append((kind, name))
235 235 return pats
236 236
237 237 def _roots(patterns):
238 238 r = []
239 239 for kind, name in patterns:
240 240 if kind == 'glob':
241 241 r.append(_globprefix(name))
242 242 elif kind in ('relpath', 'path'):
243 243 r.append(name or '.')
244 244 elif kind == 'relglob':
245 245 r.append('.')
246 246 return r
247 247
248 248 def _anypats(patterns):
249 249 for kind, name in patterns:
250 250 if kind in ('glob', 're', 'relglob', 'relre'):
251 251 return True
General Comments 0
You need to be logged in to leave comments. Login now