##// END OF EJS Templates
match: move util match functions over
Matt Mackall -
r8570:7fe2012b default
parent child Browse files
Show More
@@ -5,7 +5,7
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 import util
8 import util, re
9 9
10 10 class _match(object):
11 11 def __init__(self, root, cwd, files, mf, ap):
@@ -50,10 +50,203 class exact(_match):
50 50 class match(_match):
51 51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 52 default='glob'):
53 f, mf, ap = util.matcher(root, cwd, patterns, include, exclude,
54 default)
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
55 54 _match.__init__(self, root, cwd, f, mf, ap)
56 55
57 56 def patkind(pat):
58 return util._patsplit(pat, None)[0]
57 return _patsplit(pat, None)[0]
58
59 def _patsplit(pat, default):
60 """Split a string into an optional pattern kind prefix and the
61 actual pattern."""
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
64 return default, pat
65
66 _globchars = set('[{*?')
67
68 def _globre(pat, head='^', tail='$'):
69 "convert a glob pattern into a regexp"
70 i, n = 0, len(pat)
71 res = ''
72 group = 0
73 def peek(): return i < n and pat[i]
74 while i < n:
75 c = pat[i]
76 i = i+1
77 if c == '*':
78 if peek() == '*':
79 i += 1
80 res += '.*'
81 else:
82 res += '[^/]*'
83 elif c == '?':
84 res += '.'
85 elif c == '[':
86 j = i
87 if j < n and pat[j] in '!]':
88 j += 1
89 while j < n and pat[j] != ']':
90 j += 1
91 if j >= n:
92 res += '\\['
93 else:
94 stuff = pat[i:j].replace('\\','\\\\')
95 i = j + 1
96 if stuff[0] == '!':
97 stuff = '^' + stuff[1:]
98 elif stuff[0] == '^':
99 stuff = '\\' + stuff
100 res = '%s[%s]' % (res, stuff)
101 elif c == '{':
102 group += 1
103 res += '(?:'
104 elif c == '}' and group:
105 res += ')'
106 group -= 1
107 elif c == ',' and group:
108 res += '|'
109 elif c == '\\':
110 p = peek()
111 if p:
112 i += 1
113 res += re.escape(p)
114 else:
115 res += re.escape(c)
116 else:
117 res += re.escape(c)
118 return head + res + tail
119
120 def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
121 """build a function to match a set of file patterns
122
123 arguments:
124 canonroot - the canonical root of the tree you're matching against
125 cwd - the current working directory, if relevant
126 names - patterns to find
127 inc - patterns to include
128 exc - patterns to exclude
129 dflt_pat - if a pattern in names has no explicit type, assume this one
130
131 a pattern is one of:
132 'glob:<glob>' - a glob relative to cwd
133 're:<regexp>' - a regular expression
134 'path:<path>' - a path relative to canonroot
135 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
136 'relpath:<path>' - a path relative to cwd
137 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
138 '<something>' - one of the cases above, selected by the dflt_pat argument
139
140 returns:
141 a 3-tuple containing
142 - list of roots (places where one should start a recursive walk of the fs);
143 this often matches the explicit non-pattern names passed in, but also
144 includes the initial part of glob: patterns that has no glob characters
145 - a bool match(filename) function
146 - a bool indicating if any patterns were passed in
147 """
148
149 # a common case: no patterns at all
150 if not names and not inc and not exc:
151 return [], util.always, False
59 152
153 def contains_glob(name):
154 for c in name:
155 if c in _globchars: return True
156 return False
157
158 def regex(kind, name, tail):
159 '''convert a pattern into a regular expression'''
160 if not name:
161 return ''
162 if kind == 're':
163 return name
164 elif kind == 'path':
165 return '^' + re.escape(name) + '(?:/|$)'
166 elif kind == 'relglob':
167 return _globre(name, '(?:|.*/)', tail)
168 elif kind == 'relpath':
169 return re.escape(name) + '(?:/|$)'
170 elif kind == 'relre':
171 if name.startswith('^'):
172 return name
173 return '.*' + name
174 return _globre(name, '', tail)
175
176 def matchfn(pats, tail):
177 """build a matching function from a set of patterns"""
178 if not pats:
179 return
180 try:
181 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
182 if len(pat) > 20000:
183 raise OverflowError()
184 return re.compile(pat).match
185 except OverflowError:
186 # We're using a Python with a tiny regex engine and we
187 # made it explode, so we'll divide the pattern list in two
188 # until it works
189 l = len(pats)
190 if l < 2:
191 raise
192 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
193 return lambda s: a(s) or b(s)
194 except re.error:
195 for k, p in pats:
196 try:
197 re.compile('(?:%s)' % regex(k, p, tail))
198 except re.error:
199 raise util.Abort("invalid pattern (%s): %s" % (k, p))
200 raise util.Abort("invalid pattern")
201
202 def globprefix(pat):
203 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
204 root = []
205 for p in pat.split('/'):
206 if contains_glob(p): break
207 root.append(p)
208 return '/'.join(root) or '.'
209
210 def normalizepats(names, default):
211 pats = []
212 roots = []
213 anypats = False
214 for kind, name in [_patsplit(p, default) for p in names]:
215 if kind in ('glob', 'relpath'):
216 name = util.canonpath(canonroot, cwd, name)
217 elif kind in ('relglob', 'path'):
218 name = util.normpath(name)
219
220 pats.append((kind, name))
221
222 if kind in ('glob', 're', 'relglob', 'relre'):
223 anypats = True
224
225 if kind == 'glob':
226 root = globprefix(name)
227 roots.append(root)
228 elif kind in ('relpath', 'path'):
229 roots.append(name or '.')
230 elif kind == 'relglob':
231 roots.append('.')
232 return roots, pats, anypats
233
234 roots, pats, anypats = normalizepats(names, dflt_pat)
235
236 patmatch = matchfn(pats, '$') or util.always
237 incmatch = util.always
238 if inc:
239 dummy, inckinds, dummy = normalizepats(inc, 'glob')
240 incmatch = matchfn(inckinds, '(?:/|$)')
241 excmatch = util.never
242 if exc:
243 dummy, exckinds, dummy = normalizepats(exc, 'glob')
244 excmatch = matchfn(exckinds, '(?:/|$)')
245
246 if not names and inc and not exc:
247 # common case: hgignore patterns
248 matcher = incmatch
249 else:
250 matcher = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
251
252 return (roots, matcher, (inc or exc or anypats) and True)
@@ -207,67 +207,6 Abort = error.Abort
207 207 def always(fn): return True
208 208 def never(fn): return False
209 209
210 def _patsplit(pat, default):
211 """Split a string into an optional pattern kind prefix and the
212 actual pattern."""
213 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
214 if pat.startswith(prefix + ':'): return pat.split(':', 1)
215 return default, pat
216
217 def _globre(pat, head='^', tail='$'):
218 "convert a glob pattern into a regexp"
219 i, n = 0, len(pat)
220 res = ''
221 group = 0
222 def peek(): return i < n and pat[i]
223 while i < n:
224 c = pat[i]
225 i = i+1
226 if c == '*':
227 if peek() == '*':
228 i += 1
229 res += '.*'
230 else:
231 res += '[^/]*'
232 elif c == '?':
233 res += '.'
234 elif c == '[':
235 j = i
236 if j < n and pat[j] in '!]':
237 j += 1
238 while j < n and pat[j] != ']':
239 j += 1
240 if j >= n:
241 res += '\\['
242 else:
243 stuff = pat[i:j].replace('\\','\\\\')
244 i = j + 1
245 if stuff[0] == '!':
246 stuff = '^' + stuff[1:]
247 elif stuff[0] == '^':
248 stuff = '\\' + stuff
249 res = '%s[%s]' % (res, stuff)
250 elif c == '{':
251 group += 1
252 res += '(?:'
253 elif c == '}' and group:
254 res += ')'
255 group -= 1
256 elif c == ',' and group:
257 res += '|'
258 elif c == '\\':
259 p = peek()
260 if p:
261 i += 1
262 res += re.escape(p)
263 else:
264 res += re.escape(c)
265 else:
266 res += re.escape(c)
267 return head + res + tail
268
269 _globchars = set('[{*?')
270
271 210 def pathto(root, n1, n2):
272 211 '''return the relative path from one place to another.
273 212 root should use os.sep to separate directories
@@ -342,140 +281,6 def canonpath(root, cwd, myname):
342 281
343 282 raise Abort('%s not under root' % myname)
344 283
345 def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
346 """build a function to match a set of file patterns
347
348 arguments:
349 canonroot - the canonical root of the tree you're matching against
350 cwd - the current working directory, if relevant
351 names - patterns to find
352 inc - patterns to include
353 exc - patterns to exclude
354 dflt_pat - if a pattern in names has no explicit type, assume this one
355
356 a pattern is one of:
357 'glob:<glob>' - a glob relative to cwd
358 're:<regexp>' - a regular expression
359 'path:<path>' - a path relative to canonroot
360 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
361 'relpath:<path>' - a path relative to cwd
362 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
363 '<something>' - one of the cases above, selected by the dflt_pat argument
364
365 returns:
366 a 3-tuple containing
367 - list of roots (places where one should start a recursive walk of the fs);
368 this often matches the explicit non-pattern names passed in, but also
369 includes the initial part of glob: patterns that has no glob characters
370 - a bool match(filename) function
371 - a bool indicating if any patterns were passed in
372 """
373
374 # a common case: no patterns at all
375 if not names and not inc and not exc:
376 return [], always, False
377
378 def contains_glob(name):
379 for c in name:
380 if c in _globchars: return True
381 return False
382
383 def regex(kind, name, tail):
384 '''convert a pattern into a regular expression'''
385 if not name:
386 return ''
387 if kind == 're':
388 return name
389 elif kind == 'path':
390 return '^' + re.escape(name) + '(?:/|$)'
391 elif kind == 'relglob':
392 return _globre(name, '(?:|.*/)', tail)
393 elif kind == 'relpath':
394 return re.escape(name) + '(?:/|$)'
395 elif kind == 'relre':
396 if name.startswith('^'):
397 return name
398 return '.*' + name
399 return _globre(name, '', tail)
400
401 def matchfn(pats, tail):
402 """build a matching function from a set of patterns"""
403 if not pats:
404 return
405 try:
406 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
407 if len(pat) > 20000:
408 raise OverflowError()
409 return re.compile(pat).match
410 except OverflowError:
411 # We're using a Python with a tiny regex engine and we
412 # made it explode, so we'll divide the pattern list in two
413 # until it works
414 l = len(pats)
415 if l < 2:
416 raise
417 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
418 return lambda s: a(s) or b(s)
419 except re.error:
420 for k, p in pats:
421 try:
422 re.compile('(?:%s)' % regex(k, p, tail))
423 except re.error:
424 raise Abort("invalid pattern (%s): %s" % (k, p))
425 raise Abort("invalid pattern")
426
427 def globprefix(pat):
428 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
429 root = []
430 for p in pat.split('/'):
431 if contains_glob(p): break
432 root.append(p)
433 return '/'.join(root) or '.'
434
435 def normalizepats(names, default):
436 pats = []
437 roots = []
438 anypats = False
439 for kind, name in [_patsplit(p, default) for p in names]:
440 if kind in ('glob', 'relpath'):
441 name = canonpath(canonroot, cwd, name)
442 elif kind in ('relglob', 'path'):
443 name = normpath(name)
444
445 pats.append((kind, name))
446
447 if kind in ('glob', 're', 'relglob', 'relre'):
448 anypats = True
449
450 if kind == 'glob':
451 root = globprefix(name)
452 roots.append(root)
453 elif kind in ('relpath', 'path'):
454 roots.append(name or '.')
455 elif kind == 'relglob':
456 roots.append('.')
457 return roots, pats, anypats
458
459 roots, pats, anypats = normalizepats(names, dflt_pat)
460
461 patmatch = matchfn(pats, '$') or always
462 incmatch = always
463 if inc:
464 dummy, inckinds, dummy = normalizepats(inc, 'glob')
465 incmatch = matchfn(inckinds, '(?:/|$)')
466 excmatch = never
467 if exc:
468 dummy, exckinds, dummy = normalizepats(exc, 'glob')
469 excmatch = matchfn(exckinds, '(?:/|$)')
470
471 if not names and inc and not exc:
472 # common case: hgignore patterns
473 match = incmatch
474 else:
475 match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
476
477 return (roots, match, (inc or exc or anypats) and True)
478
479 284 _hgexecutable = None
480 285
481 286 def main_is_frozen():
General Comments 0
You need to be logged in to leave comments. Login now