##// END OF EJS Templates
bdiff: gradually enable the popularity hack...
bdiff: gradually enable the popularity hack Patch from Jason Orendorff The lower the threshold, the stronger the popularity hack's influence. So at 3999 lines, the hack is disabled; and at 4000 lines, the hack is enabled at maximum strength (t=4). No source file in mercurial/crew is over 4000 lines. But there are, oh, a few such files in Mozilla. I can testify that this hack causes hg to generate some correct but eyebrow-raising patches. I think the hack should phase in gradually. The threshold should be high for small files where we don't need it so much. Like this: t = (bn < 31000) ? 1000000 / bn : bn / 1000; That would leave the popularity hack disabled for small files, then gradually phase it in: bn < 1000 -- t > bn (popularity hack is completely disabled) bn == 1000 -- t = 1000 (still effectively disabled) bn == 2000 -- t = 500 (only hits unusual files) bn == 10000 -- t = 100 (only hits especially common lines) bn == 31000 -- t = 31 (hack is at maximum power) bn == 32000 -- t = 32 (hack could backfire, ease off)

File last commit:

r9036:32e678f9 default
r9534:8e202431 default
Show More
match.py
249 lines | 7.5 KiB | text/x-python | PythonLexer
timeless
Generally replace "file name" with "filename" in help and comments.
r8761 # match.py - filename matching
Martin Geisler
match: add copyright and license header
r8231 #
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
Alejandro Santos
split local and stdlib module imports (eases migration issues)
r9036 import re
import util
Matt Mackall
walk: introduce match objects
r6576
Matt Mackall
match: fold match into _match base class
r8587 class match(object):
Matt Mackall
match: add some default args
r8567 def __init__(self, root, cwd, patterns, include=[], exclude=[],
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 default='glob', exact=False):
Matt Mackall
match: fold _matcher into match.__init__
r8581 """build an object to match a set of file patterns
arguments:
root - the canonical root of the tree you're matching against
cwd - the current working directory, if relevant
patterns - patterns to find
include - patterns to include
exclude - patterns to exclude
default - if a pattern in names has no explicit type, assume this one
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 exact - patterns are actually literals
Matt Mackall
match: fold _matcher into match.__init__
r8581
a pattern is one of:
'glob:<glob>' - a glob relative to cwd
're:<regexp>' - a regular expression
'path:<path>' - a path relative to canonroot
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
'relpath:<path>' - a path relative to cwd
Matt Mackall
match: fold match into _match base class
r8587 'relre:<regexp>' - a regexp that needn't match the start of a name
'<something>' - a pattern of the specified default type
Matt Mackall
match: fold _matcher into match.__init__
r8581 """
Matt Mackall
match: fold match into _match base class
r8587 self._root = root
self._cwd = cwd
self._files = []
self._anypats = bool(include or exclude)
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if include:
im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)')
if exclude:
em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)')
if exact:
Matt Mackall
match: fold match into _match base class
r8587 self._files = patterns
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 pm = self.exact
elif patterns:
Matt Mackall
match: fold _matcher into match.__init__
r8581 pats = _normalize(patterns, default, root, cwd)
Matt Mackall
match: fold match into _match base class
r8587 self._files = _roots(pats)
self._anypats = self._anypats or _anypats(pats)
Matt Mackall
match: fold _matcher into match.__init__
r8581 pm = _buildmatch(pats, '$')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if patterns or exact:
Matt Mackall
match: fold _matcher into match.__init__
r8581 if include:
if exclude:
m = lambda f: im(f) and not em(f) and pm(f)
else:
m = lambda f: im(f) and pm(f)
else:
if exclude:
m = lambda f: not em(f) and pm(f)
else:
m = pm
else:
if include:
if exclude:
m = lambda f: im(f) and not em(f)
else:
m = im
else:
if exclude:
m = lambda f: not em(f)
else:
m = lambda f: True
Matt Mackall
match: fold match into _match base class
r8587 self.matchfn = m
self._fmap = set(self._files)
def __call__(self, fn):
return self.matchfn(fn)
def __iter__(self):
for f in self._files:
yield f
def bad(self, f, msg):
Matt Mackall
match: document bad callback semantics
r8678 '''callback for each explicit file that can't be
found/accessed, with an error message
'''
Matt Mackall
match: ignore return of match.bad...
r8680 pass
Matt Mackall
match: fold match into _match base class
r8587 def dir(self, f):
pass
def missing(self, f):
pass
def exact(self, f):
return f in self._fmap
def rel(self, f):
return util.pathto(self._root, self._cwd, f)
def files(self):
return self._files
def anypats(self):
return self._anypats
Matt Mackall
match: refactor patkind...
r8568
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 class exact(match):
Matt Mackall
match: redefine always and never in terms of match and exact
r8585 def __init__(self, root, cwd, files):
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 match.__init__(self, root, cwd, files, exact = True)
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
class always(match):
def __init__(self, root, cwd):
match.__init__(self, root, cwd, [])
Matt Mackall
match: refactor patkind...
r8568 def patkind(pat):
Matt Mackall
match: move util match functions over
r8570 return _patsplit(pat, None)[0]
def _patsplit(pat, default):
"""Split a string into an optional pattern kind prefix and the
actual pattern."""
Matt Mackall
match: optimize _patsplit
r8579 if ':' in pat:
Matt Mackall
match: fix _patsplit breakage with drive letters
r8613 kind, val = pat.split(':', 1)
if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
return kind, val
Matt Mackall
match: move util match functions over
r8570 return default, pat
Matt Mackall
match: remove head and tail args from _globre
r8582 def _globre(pat):
Matt Mackall
match: move util match functions over
r8570 "convert a glob pattern into a regexp"
i, n = 0, len(pat)
res = ''
group = 0
Matt Mackall
match: optimize escaping in _globre...
r8583 escape = re.escape
Matt Mackall
match: move util match functions over
r8570 def peek(): return i < n and pat[i]
while i < n:
c = pat[i]
i = i+1
Matt Mackall
match: optimize escaping in _globre...
r8583 if c not in '*?[{},\\':
res += escape(c)
elif c == '*':
Matt Mackall
match: move util match functions over
r8570 if peek() == '*':
i += 1
res += '.*'
else:
res += '[^/]*'
elif c == '?':
res += '.'
elif c == '[':
j = i
if j < n and pat[j] in '!]':
j += 1
while j < n and pat[j] != ']':
j += 1
if j >= n:
res += '\\['
else:
stuff = pat[i:j].replace('\\','\\\\')
i = j + 1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
elif c == '{':
group += 1
res += '(?:'
elif c == '}' and group:
res += ')'
group -= 1
elif c == ',' and group:
res += '|'
elif c == '\\':
p = peek()
if p:
i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(p)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: remove head and tail args from _globre
r8582 return res
Matt Mackall
match: move util match functions over
r8570
Matt Mackall
match: unnest functions in _matcher
r8574 def _regex(kind, name, tail):
'''convert a pattern into a regular expression'''
if not name:
return ''
if kind == 're':
return name
elif kind == 'path':
return '^' + re.escape(name) + '(?:/|$)'
elif kind == 'relglob':
Matt Mackall
match: remove head and tail args from _globre
r8582 return '(?:|.*/)' + _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind == 'relpath':
return re.escape(name) + '(?:/|$)'
elif kind == 'relre':
if name.startswith('^'):
return name
return '.*' + name
Matt Mackall
match: remove head and tail args from _globre
r8582 return _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: rename _matchfn to _buildmatch
r8580 def _buildmatch(pats, tail):
Matt Mackall
match: unnest functions in _matcher
r8574 """build a matching function from a set of patterns"""
try:
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
if len(pat) > 20000:
raise OverflowError()
return re.compile(pat).match
except OverflowError:
# We're using a Python with a tiny regex engine and we
# made it explode, so we'll divide the pattern list in two
# until it works
l = len(pats)
if l < 2:
raise
Matt Mackall
match: rename _matchfn to _buildmatch
r8580 a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
Matt Mackall
match: unnest functions in _matcher
r8574 return lambda s: a(s) or b(s)
except re.error:
for k, p in pats:
try:
re.compile('(?:%s)' % _regex(k, p, tail))
except re.error:
raise util.Abort("invalid pattern (%s): %s" % (k, p))
raise util.Abort("invalid pattern")
Matt Mackall
match: tweak some names
r8578 def _normalize(names, default, root, cwd):
Matt Mackall
match: unnest functions in _matcher
r8574 pats = []
for kind, name in [_patsplit(p, default) for p in names]:
if kind in ('glob', 'relpath'):
Matt Mackall
match: tweak some names
r8578 name = util.canonpath(root, cwd, name)
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relglob', 'path'):
name = util.normpath(name)
pats.append((kind, name))
Matt Mackall
match: split up _normalizepats
r8576 return pats
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: split up _normalizepats
r8576 def _roots(patterns):
r = []
for kind, name in patterns:
Matt Mackall
match: fold _globprefix into _roots
r8584 if kind == 'glob': # find the non-glob prefix
root = []
for p in name.split('/'):
if '[' in p or '{' in p or '*' in p or '?' in p:
break
root.append(p)
r.append('/'.join(root) or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relpath', 'path'):
Matt Mackall
match: split up _normalizepats
r8576 r.append(name or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind == 'relglob':
Matt Mackall
match: split up _normalizepats
r8576 r.append('.')
return r
def _anypats(patterns):
for kind, name in patterns:
if kind in ('glob', 're', 'relglob', 'relre'):
return True