##// END OF EJS Templates
match: move util match functions over
Matt Mackall -
r8570:7fe2012b default
parent child Browse files
Show More
@@ -5,7 +5,7 b''
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 import util
8 import util, re
9
9
10 class _match(object):
10 class _match(object):
11 def __init__(self, root, cwd, files, mf, ap):
11 def __init__(self, root, cwd, files, mf, ap):
@@ -50,10 +50,203 b' class exact(_match):'
50 class match(_match):
50 class match(_match):
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
51 def __init__(self, root, cwd, patterns, include=[], exclude=[],
52 default='glob'):
52 default='glob'):
53 f, mf, ap = util.matcher(root, cwd, patterns, include, exclude,
53 f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
54 default)
55 _match.__init__(self, root, cwd, f, mf, ap)
54 _match.__init__(self, root, cwd, f, mf, ap)
56
55
57 def patkind(pat):
56 def patkind(pat):
58 return util._patsplit(pat, None)[0]
57 return _patsplit(pat, None)[0]
58
59 def _patsplit(pat, default):
60 """Split a string into an optional pattern kind prefix and the
61 actual pattern."""
62 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
63 if pat.startswith(prefix + ':'): return pat.split(':', 1)
64 return default, pat
65
66 _globchars = set('[{*?')
67
68 def _globre(pat, head='^', tail='$'):
69 "convert a glob pattern into a regexp"
70 i, n = 0, len(pat)
71 res = ''
72 group = 0
73 def peek(): return i < n and pat[i]
74 while i < n:
75 c = pat[i]
76 i = i+1
77 if c == '*':
78 if peek() == '*':
79 i += 1
80 res += '.*'
81 else:
82 res += '[^/]*'
83 elif c == '?':
84 res += '.'
85 elif c == '[':
86 j = i
87 if j < n and pat[j] in '!]':
88 j += 1
89 while j < n and pat[j] != ']':
90 j += 1
91 if j >= n:
92 res += '\\['
93 else:
94 stuff = pat[i:j].replace('\\','\\\\')
95 i = j + 1
96 if stuff[0] == '!':
97 stuff = '^' + stuff[1:]
98 elif stuff[0] == '^':
99 stuff = '\\' + stuff
100 res = '%s[%s]' % (res, stuff)
101 elif c == '{':
102 group += 1
103 res += '(?:'
104 elif c == '}' and group:
105 res += ')'
106 group -= 1
107 elif c == ',' and group:
108 res += '|'
109 elif c == '\\':
110 p = peek()
111 if p:
112 i += 1
113 res += re.escape(p)
114 else:
115 res += re.escape(c)
116 else:
117 res += re.escape(c)
118 return head + res + tail
119
120 def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
121 """build a function to match a set of file patterns
122
123 arguments:
124 canonroot - the canonical root of the tree you're matching against
125 cwd - the current working directory, if relevant
126 names - patterns to find
127 inc - patterns to include
128 exc - patterns to exclude
129 dflt_pat - if a pattern in names has no explicit type, assume this one
130
131 a pattern is one of:
132 'glob:<glob>' - a glob relative to cwd
133 're:<regexp>' - a regular expression
134 'path:<path>' - a path relative to canonroot
135 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
136 'relpath:<path>' - a path relative to cwd
137 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
138 '<something>' - one of the cases above, selected by the dflt_pat argument
139
140 returns:
141 a 3-tuple containing
142 - list of roots (places where one should start a recursive walk of the fs);
143 this often matches the explicit non-pattern names passed in, but also
144 includes the initial part of glob: patterns that has no glob characters
145 - a bool match(filename) function
146 - a bool indicating if any patterns were passed in
147 """
148
149 # a common case: no patterns at all
150 if not names and not inc and not exc:
151 return [], util.always, False
59
152
153 def contains_glob(name):
154 for c in name:
155 if c in _globchars: return True
156 return False
157
158 def regex(kind, name, tail):
159 '''convert a pattern into a regular expression'''
160 if not name:
161 return ''
162 if kind == 're':
163 return name
164 elif kind == 'path':
165 return '^' + re.escape(name) + '(?:/|$)'
166 elif kind == 'relglob':
167 return _globre(name, '(?:|.*/)', tail)
168 elif kind == 'relpath':
169 return re.escape(name) + '(?:/|$)'
170 elif kind == 'relre':
171 if name.startswith('^'):
172 return name
173 return '.*' + name
174 return _globre(name, '', tail)
175
176 def matchfn(pats, tail):
177 """build a matching function from a set of patterns"""
178 if not pats:
179 return
180 try:
181 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
182 if len(pat) > 20000:
183 raise OverflowError()
184 return re.compile(pat).match
185 except OverflowError:
186 # We're using a Python with a tiny regex engine and we
187 # made it explode, so we'll divide the pattern list in two
188 # until it works
189 l = len(pats)
190 if l < 2:
191 raise
192 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
193 return lambda s: a(s) or b(s)
194 except re.error:
195 for k, p in pats:
196 try:
197 re.compile('(?:%s)' % regex(k, p, tail))
198 except re.error:
199 raise util.Abort("invalid pattern (%s): %s" % (k, p))
200 raise util.Abort("invalid pattern")
201
202 def globprefix(pat):
203 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
204 root = []
205 for p in pat.split('/'):
206 if contains_glob(p): break
207 root.append(p)
208 return '/'.join(root) or '.'
209
210 def normalizepats(names, default):
211 pats = []
212 roots = []
213 anypats = False
214 for kind, name in [_patsplit(p, default) for p in names]:
215 if kind in ('glob', 'relpath'):
216 name = util.canonpath(canonroot, cwd, name)
217 elif kind in ('relglob', 'path'):
218 name = util.normpath(name)
219
220 pats.append((kind, name))
221
222 if kind in ('glob', 're', 'relglob', 'relre'):
223 anypats = True
224
225 if kind == 'glob':
226 root = globprefix(name)
227 roots.append(root)
228 elif kind in ('relpath', 'path'):
229 roots.append(name or '.')
230 elif kind == 'relglob':
231 roots.append('.')
232 return roots, pats, anypats
233
234 roots, pats, anypats = normalizepats(names, dflt_pat)
235
236 patmatch = matchfn(pats, '$') or util.always
237 incmatch = util.always
238 if inc:
239 dummy, inckinds, dummy = normalizepats(inc, 'glob')
240 incmatch = matchfn(inckinds, '(?:/|$)')
241 excmatch = util.never
242 if exc:
243 dummy, exckinds, dummy = normalizepats(exc, 'glob')
244 excmatch = matchfn(exckinds, '(?:/|$)')
245
246 if not names and inc and not exc:
247 # common case: hgignore patterns
248 matcher = incmatch
249 else:
250 matcher = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
251
252 return (roots, matcher, (inc or exc or anypats) and True)
@@ -207,67 +207,6 b' Abort = error.Abort'
207 def always(fn): return True
207 def always(fn): return True
208 def never(fn): return False
208 def never(fn): return False
209
209
210 def _patsplit(pat, default):
211 """Split a string into an optional pattern kind prefix and the
212 actual pattern."""
213 for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
214 if pat.startswith(prefix + ':'): return pat.split(':', 1)
215 return default, pat
216
217 def _globre(pat, head='^', tail='$'):
218 "convert a glob pattern into a regexp"
219 i, n = 0, len(pat)
220 res = ''
221 group = 0
222 def peek(): return i < n and pat[i]
223 while i < n:
224 c = pat[i]
225 i = i+1
226 if c == '*':
227 if peek() == '*':
228 i += 1
229 res += '.*'
230 else:
231 res += '[^/]*'
232 elif c == '?':
233 res += '.'
234 elif c == '[':
235 j = i
236 if j < n and pat[j] in '!]':
237 j += 1
238 while j < n and pat[j] != ']':
239 j += 1
240 if j >= n:
241 res += '\\['
242 else:
243 stuff = pat[i:j].replace('\\','\\\\')
244 i = j + 1
245 if stuff[0] == '!':
246 stuff = '^' + stuff[1:]
247 elif stuff[0] == '^':
248 stuff = '\\' + stuff
249 res = '%s[%s]' % (res, stuff)
250 elif c == '{':
251 group += 1
252 res += '(?:'
253 elif c == '}' and group:
254 res += ')'
255 group -= 1
256 elif c == ',' and group:
257 res += '|'
258 elif c == '\\':
259 p = peek()
260 if p:
261 i += 1
262 res += re.escape(p)
263 else:
264 res += re.escape(c)
265 else:
266 res += re.escape(c)
267 return head + res + tail
268
269 _globchars = set('[{*?')
270
271 def pathto(root, n1, n2):
210 def pathto(root, n1, n2):
272 '''return the relative path from one place to another.
211 '''return the relative path from one place to another.
273 root should use os.sep to separate directories
212 root should use os.sep to separate directories
@@ -342,140 +281,6 b' def canonpath(root, cwd, myname):'
342
281
343 raise Abort('%s not under root' % myname)
282 raise Abort('%s not under root' % myname)
344
283
345 def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
346 """build a function to match a set of file patterns
347
348 arguments:
349 canonroot - the canonical root of the tree you're matching against
350 cwd - the current working directory, if relevant
351 names - patterns to find
352 inc - patterns to include
353 exc - patterns to exclude
354 dflt_pat - if a pattern in names has no explicit type, assume this one
355
356 a pattern is one of:
357 'glob:<glob>' - a glob relative to cwd
358 're:<regexp>' - a regular expression
359 'path:<path>' - a path relative to canonroot
360 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
361 'relpath:<path>' - a path relative to cwd
362 'relre:<regexp>' - a regexp that doesn't have to match the start of a name
363 '<something>' - one of the cases above, selected by the dflt_pat argument
364
365 returns:
366 a 3-tuple containing
367 - list of roots (places where one should start a recursive walk of the fs);
368 this often matches the explicit non-pattern names passed in, but also
369 includes the initial part of glob: patterns that has no glob characters
370 - a bool match(filename) function
371 - a bool indicating if any patterns were passed in
372 """
373
374 # a common case: no patterns at all
375 if not names and not inc and not exc:
376 return [], always, False
377
378 def contains_glob(name):
379 for c in name:
380 if c in _globchars: return True
381 return False
382
383 def regex(kind, name, tail):
384 '''convert a pattern into a regular expression'''
385 if not name:
386 return ''
387 if kind == 're':
388 return name
389 elif kind == 'path':
390 return '^' + re.escape(name) + '(?:/|$)'
391 elif kind == 'relglob':
392 return _globre(name, '(?:|.*/)', tail)
393 elif kind == 'relpath':
394 return re.escape(name) + '(?:/|$)'
395 elif kind == 'relre':
396 if name.startswith('^'):
397 return name
398 return '.*' + name
399 return _globre(name, '', tail)
400
401 def matchfn(pats, tail):
402 """build a matching function from a set of patterns"""
403 if not pats:
404 return
405 try:
406 pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
407 if len(pat) > 20000:
408 raise OverflowError()
409 return re.compile(pat).match
410 except OverflowError:
411 # We're using a Python with a tiny regex engine and we
412 # made it explode, so we'll divide the pattern list in two
413 # until it works
414 l = len(pats)
415 if l < 2:
416 raise
417 a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
418 return lambda s: a(s) or b(s)
419 except re.error:
420 for k, p in pats:
421 try:
422 re.compile('(?:%s)' % regex(k, p, tail))
423 except re.error:
424 raise Abort("invalid pattern (%s): %s" % (k, p))
425 raise Abort("invalid pattern")
426
427 def globprefix(pat):
428 '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
429 root = []
430 for p in pat.split('/'):
431 if contains_glob(p): break
432 root.append(p)
433 return '/'.join(root) or '.'
434
435 def normalizepats(names, default):
436 pats = []
437 roots = []
438 anypats = False
439 for kind, name in [_patsplit(p, default) for p in names]:
440 if kind in ('glob', 'relpath'):
441 name = canonpath(canonroot, cwd, name)
442 elif kind in ('relglob', 'path'):
443 name = normpath(name)
444
445 pats.append((kind, name))
446
447 if kind in ('glob', 're', 'relglob', 'relre'):
448 anypats = True
449
450 if kind == 'glob':
451 root = globprefix(name)
452 roots.append(root)
453 elif kind in ('relpath', 'path'):
454 roots.append(name or '.')
455 elif kind == 'relglob':
456 roots.append('.')
457 return roots, pats, anypats
458
459 roots, pats, anypats = normalizepats(names, dflt_pat)
460
461 patmatch = matchfn(pats, '$') or always
462 incmatch = always
463 if inc:
464 dummy, inckinds, dummy = normalizepats(inc, 'glob')
465 incmatch = matchfn(inckinds, '(?:/|$)')
466 excmatch = never
467 if exc:
468 dummy, exckinds, dummy = normalizepats(exc, 'glob')
469 excmatch = matchfn(exckinds, '(?:/|$)')
470
471 if not names and inc and not exc:
472 # common case: hgignore patterns
473 match = incmatch
474 else:
475 match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
476
477 return (roots, match, (inc or exc or anypats) and True)
478
479 _hgexecutable = None
284 _hgexecutable = None
480
285
481 def main_is_frozen():
286 def main_is_frozen():
General Comments 0
You need to be logged in to leave comments. Login now