upstream/mercurial-mirror Commit - r8570:7fe2012b

match: move util match functions over

Matt Mackall -

r8570:7fe2012b default

parent child

mercurial/match.py

0 +197 -4

@@ -5,7 +5,7 b''
5	# This software may be used and distributed according to the terms of the	5	# This software may be used and distributed according to the terms of the
6	# GNU General Public License version 2, incorporated herein by reference.	6	# GNU General Public License version 2, incorporated herein by reference.
7		7
8	import util	8	import util, re
9		9
10	class _match(object):	10	class _match(object):
11	def __init__(self, root, cwd, files, mf, ap):	11	def __init__(self, root, cwd, files, mf, ap):
@@ -50,10 +50,203 b' class exact(_match):'
50	class match(_match):	50	class match(_match):
51	def __init__(self, root, cwd, patterns, include=[], exclude=[],	51	def __init__(self, root, cwd, patterns, include=[], exclude=[],
52	default='glob'):	52	default='glob'):
53	f, mf, ap = ~~util~~.matcher(root, cwd, patterns, include, exclude,	53	f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
54	default)
55	_match.__init__(self, root, cwd, f, mf, ap)	54	_match.__init__(self, root, cwd, f, mf, ap)
56		55
57	def patkind(pat):	56	def patkind(pat):
58	return ~~util~~._patsplit(pat, None)[0]	57	return _patsplit(pat, None)[0]
		58
		59	def _patsplit(pat, default):
		60	"""Split a string into an optional pattern kind prefix and the
		61	actual pattern."""
		62	for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
		63	if pat.startswith(prefix + ':'): return pat.split(':', 1)
		64	return default, pat
		65
		66	_globchars = set('[{*?')
		67
		68	def _globre(pat, head='^', tail='$'):
		69	"convert a glob pattern into a regexp"
		70	i, n = 0, len(pat)
		71	res = ''
		72	group = 0
		73	def peek(): return i < n and pat[i]
		74	while i < n:
		75	c = pat[i]
		76	i = i+1
		77	if c == '*':
		78	if peek() == '*':
		79	i += 1
		80	res += '.*'
		81	else:
		82	res += '[^/]*'
		83	elif c == '?':
		84	res += '.'
		85	elif c == '[':
		86	j = i
		87	if j < n and pat[j] in '!]':
		88	j += 1
		89	while j < n and pat[j] != ']':
		90	j += 1
		91	if j >= n:
		92	res += '\\['
		93	else:
		94	stuff = pat[i:j].replace('\\','\\\\')
		95	i = j + 1
		96	if stuff[0] == '!':
		97	stuff = '^' + stuff[1:]
		98	elif stuff[0] == '^':
		99	stuff = '\\' + stuff
		100	res = '%s[%s]' % (res, stuff)
		101	elif c == '{':
		102	group += 1
		103	res += '(?:'
		104	elif c == '}' and group:
		105	res += ')'
		106	group -= 1
		107	elif c == ',' and group:
		108	res += '\|'
		109	elif c == '\\':
		110	p = peek()
		111	if p:
		112	i += 1
		113	res += re.escape(p)
		114	else:
		115	res += re.escape(c)
		116	else:
		117	res += re.escape(c)
		118	return head + res + tail
		119
		120	def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
		121	"""build a function to match a set of file patterns
		122
		123	arguments:
		124	canonroot - the canonical root of the tree you're matching against
		125	cwd - the current working directory, if relevant
		126	names - patterns to find
		127	inc - patterns to include
		128	exc - patterns to exclude
		129	dflt_pat - if a pattern in names has no explicit type, assume this one
		130
		131	a pattern is one of:
		132	'glob:<glob>' - a glob relative to cwd
		133	're:<regexp>' - a regular expression
		134	'path:<path>' - a path relative to canonroot
		135	'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
		136	'relpath:<path>' - a path relative to cwd
		137	'relre:<regexp>' - a regexp that doesn't have to match the start of a name
		138	'<something>' - one of the cases above, selected by the dflt_pat argument
		139
		140	returns:
		141	a 3-tuple containing
		142	- list of roots (places where one should start a recursive walk of the fs);
		143	this often matches the explicit non-pattern names passed in, but also
		144	includes the initial part of glob: patterns that has no glob characters
		145	- a bool match(filename) function
		146	- a bool indicating if any patterns were passed in
		147	"""
		148
		149	# a common case: no patterns at all
		150	if not names and not inc and not exc:
		151	return [], util.always, False
59		152
		153	def contains_glob(name):
		154	for c in name:
		155	if c in _globchars: return True
		156	return False
		157
		158	def regex(kind, name, tail):
		159	'''convert a pattern into a regular expression'''
		160	if not name:
		161	return ''
		162	if kind == 're':
		163	return name
		164	elif kind == 'path':
		165	return '^' + re.escape(name) + '(?:/\|$)'
		166	elif kind == 'relglob':
		167	return _globre(name, '(?:\|.*/)', tail)
		168	elif kind == 'relpath':
		169	return re.escape(name) + '(?:/\|$)'
		170	elif kind == 'relre':
		171	if name.startswith('^'):
		172	return name
		173	return '.*' + name
		174	return _globre(name, '', tail)
		175
		176	def matchfn(pats, tail):
		177	"""build a matching function from a set of patterns"""
		178	if not pats:
		179	return
		180	try:
		181	pat = '(?:%s)' % '\|'.join([regex(k, p, tail) for (k, p) in pats])
		182	if len(pat) > 20000:
		183	raise OverflowError()
		184	return re.compile(pat).match
		185	except OverflowError:
		186	# We're using a Python with a tiny regex engine and we
		187	# made it explode, so we'll divide the pattern list in two
		188	# until it works
		189	l = len(pats)
		190	if l < 2:
		191	raise
		192	a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
		193	return lambda s: a(s) or b(s)
		194	except re.error:
		195	for k, p in pats:
		196	try:
		197	re.compile('(?:%s)' % regex(k, p, tail))
		198	except re.error:
		199	raise util.Abort("invalid pattern (%s): %s" % (k, p))
		200	raise util.Abort("invalid pattern")
		201
		202	def globprefix(pat):
		203	'''return the non-glob prefix of a path, e.g. foo/* -> foo'''
		204	root = []
		205	for p in pat.split('/'):
		206	if contains_glob(p): break
		207	root.append(p)
		208	return '/'.join(root) or '.'
		209
		210	def normalizepats(names, default):
		211	pats = []
		212	roots = []
		213	anypats = False
		214	for kind, name in [_patsplit(p, default) for p in names]:
		215	if kind in ('glob', 'relpath'):
		216	name = util.canonpath(canonroot, cwd, name)
		217	elif kind in ('relglob', 'path'):
		218	name = util.normpath(name)
		219
		220	pats.append((kind, name))
		221
		222	if kind in ('glob', 're', 'relglob', 'relre'):
		223	anypats = True
		224
		225	if kind == 'glob':
		226	root = globprefix(name)
		227	roots.append(root)
		228	elif kind in ('relpath', 'path'):
		229	roots.append(name or '.')
		230	elif kind == 'relglob':
		231	roots.append('.')
		232	return roots, pats, anypats
		233
		234	roots, pats, anypats = normalizepats(names, dflt_pat)
		235
		236	patmatch = matchfn(pats, '$') or util.always
		237	incmatch = util.always
		238	if inc:
		239	dummy, inckinds, dummy = normalizepats(inc, 'glob')
		240	incmatch = matchfn(inckinds, '(?:/\|$)')
		241	excmatch = util.never
		242	if exc:
		243	dummy, exckinds, dummy = normalizepats(exc, 'glob')
		244	excmatch = matchfn(exckinds, '(?:/\|$)')
		245
		246	if not names and inc and not exc:
		247	# common case: hgignore patterns
		248	matcher = incmatch
		249	else:
		250	matcher = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
		251
		252	return (roots, matcher, (inc or exc or anypats) and True)

mercurial/util.py

0 0 -195

@@ -207,67 +207,6 b' Abort = error.Abort'
207	def always(fn): return True	207	def always(fn): return True
208	def never(fn): return False	208	def never(fn): return False
209		209
210	def _patsplit(pat, default):
211	"""Split a string into an optional pattern kind prefix and the
212	actual pattern."""
213	for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
214	if pat.startswith(prefix + ':'): return pat.split(':', 1)
215	return default, pat
216
217	def _globre(pat, head='^', tail='$'):
218	"convert a glob pattern into a regexp"
219	i, n = 0, len(pat)
220	res = ''
221	group = 0
222	def peek(): return i < n and pat[i]
223	while i < n:
224	c = pat[i]
225	i = i+1
226	if c == '*':
227	if peek() == '*':
228	i += 1
229	res += '.*'
230	else:
231	res += '[^/]*'
232	elif c == '?':
233	res += '.'
234	elif c == '[':
235	j = i
236	if j < n and pat[j] in '!]':
237	j += 1
238	while j < n and pat[j] != ']':
239	j += 1
240	if j >= n:
241	res += '\\['
242	else:
243	stuff = pat[i:j].replace('\\','\\\\')
244	i = j + 1
245	if stuff[0] == '!':
246	stuff = '^' + stuff[1:]
247	elif stuff[0] == '^':
248	stuff = '\\' + stuff
249	res = '%s[%s]' % (res, stuff)
250	elif c == '{':
251	group += 1
252	res += '(?:'
253	elif c == '}' and group:
254	res += ')'
255	group -= 1
256	elif c == ',' and group:
257	res += '\|'
258	elif c == '\\':
259	p = peek()
260	if p:
261	i += 1
262	res += re.escape(p)
263	else:
264	res += re.escape(c)
265	else:
266	res += re.escape(c)
267	return head + res + tail
268
269	_globchars = set('[{*?')
270
271	def pathto(root, n1, n2):	210	def pathto(root, n1, n2):
272	'''return the relative path from one place to another.	211	'''return the relative path from one place to another.
273	root should use os.sep to separate directories	212	root should use os.sep to separate directories
@@ -342,140 +281,6 b' def canonpath(root, cwd, myname):'
342		281
343	raise Abort('%s not under root' % myname)	282	raise Abort('%s not under root' % myname)
344		283
345	def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
346	"""build a function to match a set of file patterns
347
348	arguments:
349	canonroot - the canonical root of the tree you're matching against
350	cwd - the current working directory, if relevant
351	names - patterns to find
352	inc - patterns to include
353	exc - patterns to exclude
354	dflt_pat - if a pattern in names has no explicit type, assume this one
355
356	a pattern is one of:
357	'glob:<glob>' - a glob relative to cwd
358	're:<regexp>' - a regular expression
359	'path:<path>' - a path relative to canonroot
360	'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
361	'relpath:<path>' - a path relative to cwd
362	'relre:<regexp>' - a regexp that doesn't have to match the start of a name
363	'<something>' - one of the cases above, selected by the dflt_pat argument
364
365	returns:
366	a 3-tuple containing
367	- list of roots (places where one should start a recursive walk of the fs);
368	this often matches the explicit non-pattern names passed in, but also
369	includes the initial part of glob: patterns that has no glob characters
370	- a bool match(filename) function
371	- a bool indicating if any patterns were passed in
372	"""
373
374	# a common case: no patterns at all
375	if not names and not inc and not exc:
376	return [], always, False
377
378	def contains_glob(name):
379	for c in name:
380	if c in _globchars: return True
381	return False
382
383	def regex(kind, name, tail):
384	'''convert a pattern into a regular expression'''
385	if not name:
386	return ''
387	if kind == 're':
388	return name
389	elif kind == 'path':
390	return '^' + re.escape(name) + '(?:/\|$)'
391	elif kind == 'relglob':
392	return _globre(name, '(?:\|.*/)', tail)
393	elif kind == 'relpath':
394	return re.escape(name) + '(?:/\|$)'
395	elif kind == 'relre':
396	if name.startswith('^'):
397	return name
398	return '.*' + name
399	return _globre(name, '', tail)
400
401	def matchfn(pats, tail):
402	"""build a matching function from a set of patterns"""
403	if not pats:
404	return
405	try:
406	pat = '(?:%s)' % '\|'.join([regex(k, p, tail) for (k, p) in pats])
407	if len(pat) > 20000:
408	raise OverflowError()
409	return re.compile(pat).match
410	except OverflowError:
411	# We're using a Python with a tiny regex engine and we
412	# made it explode, so we'll divide the pattern list in two
413	# until it works
414	l = len(pats)
415	if l < 2:
416	raise
417	a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
418	return lambda s: a(s) or b(s)
419	except re.error:
420	for k, p in pats:
421	try:
422	re.compile('(?:%s)' % regex(k, p, tail))
423	except re.error:
424	raise Abort("invalid pattern (%s): %s" % (k, p))
425	raise Abort("invalid pattern")
426
427	def globprefix(pat):
428	'''return the non-glob prefix of a path, e.g. foo/* -> foo'''
429	root = []
430	for p in pat.split('/'):
431	if contains_glob(p): break
432	root.append(p)
433	return '/'.join(root) or '.'
434
435	def normalizepats(names, default):
436	pats = []
437	roots = []
438	anypats = False
439	for kind, name in [_patsplit(p, default) for p in names]:
440	if kind in ('glob', 'relpath'):
441	name = canonpath(canonroot, cwd, name)
442	elif kind in ('relglob', 'path'):
443	name = normpath(name)
444
445	pats.append((kind, name))
446
447	if kind in ('glob', 're', 'relglob', 'relre'):
448	anypats = True
449
450	if kind == 'glob':
451	root = globprefix(name)
452	roots.append(root)
453	elif kind in ('relpath', 'path'):
454	roots.append(name or '.')
455	elif kind == 'relglob':
456	roots.append('.')
457	return roots, pats, anypats
458
459	roots, pats, anypats = normalizepats(names, dflt_pat)
460
461	patmatch = matchfn(pats, '$') or always
462	incmatch = always
463	if inc:
464	dummy, inckinds, dummy = normalizepats(inc, 'glob')
465	incmatch = matchfn(inckinds, '(?:/\|$)')
466	excmatch = never
467	if exc:
468	dummy, exckinds, dummy = normalizepats(exc, 'glob')
469	excmatch = matchfn(exckinds, '(?:/\|$)')
470
471	if not names and inc and not exc:
472	# common case: hgignore patterns
473	match = incmatch
474	else:
475	match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
476
477	return (roots, match, (inc or exc or anypats) and True)
478
479	_hgexecutable = None	284	_hgexecutable = None
480		285
481	def main_is_frozen():	286	def main_is_frozen():

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages