# HG changeset patch # User Yuya Nishihara # Date 2018-07-21 06:52:26 # Node ID 7e7e2b2ff284f00a40b3599ac510b5f97eb9d0a8 # Parent 3ea6ce609747a500e6d56190063688ee25bd1fd8 fileset: add stub for weight-based optimization The main purpose of this change is to group basic patterns, which can be mapped to a plain matcher. I'm not so interested in a weight of each function. diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -902,6 +902,7 @@ def debugfileset(ui, repo, expr, **opts) stages = [ ('parsed', pycompat.identity), ('analyzed', filesetlang.analyze), + ('optimized', filesetlang.optimize), ] stagenames = set(n for n, f in stages) diff --git a/mercurial/fileset.py b/mercurial/fileset.py --- a/mercurial/fileset.py +++ b/mercurial/fileset.py @@ -524,6 +524,7 @@ def match(ctx, expr, badfn=None): """Create a matcher for a single fileset expression""" tree = filesetlang.parse(expr) tree = filesetlang.analyze(tree) + tree = filesetlang.optimize(tree) mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn) return getmatch(mctx, tree) diff --git a/mercurial/filesetlang.py b/mercurial/filesetlang.py --- a/mercurial/filesetlang.py +++ b/mercurial/filesetlang.py @@ -164,12 +164,50 @@ def _analyze(x): def analyze(x): """Transform raw parsed tree to evaluatable tree which can be fed to - getmatch() + optimize() or getmatch() All pseudo operations should be mapped to real operations or functions defined in methods or symbols table respectively. """ return _analyze(x) +def _optimize(x): + if x is None: + return 0, x + + op = x[0] + if op in {'string', 'symbol'}: + return 0.5, x + if op == 'kindpat': + w, t = _optimize(x[2]) + return w, (op, x[1], t) + if op == 'not': + w, t = _optimize(x[1]) + return w, (op, t) + if op in {'and', 'minus'}: + wa, ta = _optimize(x[1]) + wb, tb = _optimize(x[2]) + return max(wa, wb), (op, ta, tb) + if op == 'or': + ws, ts = zip(*(_optimize(y) for y in x[1:])) + return max(ws), (op,) + ts + if op == 'list': + ws, ts = zip(*(_optimize(y) for y in x[1:])) + return sum(ws), (op,) + ts + if op == 'func': + f = getsymbol(x[1]) + w = getattr(symbols.get(f), '_weight', 1) + wa, ta = _optimize(x[2]) + return w + wa, (op, x[1], ta) + raise error.ProgrammingError('invalid operator %r' % op) + +def optimize(x): + """Reorder/rewrite evaluatable tree for optimization + + All pseudo operations should be transformed beforehand. + """ + _w, t = _optimize(x) + return t + def prettyformat(tree): return parser.prettyformat(tree, ('string', 'symbol')) diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py --- a/mercurial/minifileset.py +++ b/mercurial/minifileset.py @@ -86,4 +86,5 @@ def compile(text): """ tree = filesetlang.parse(text) tree = filesetlang.analyze(tree) + tree = filesetlang.optimize(tree) return _compile(tree) diff --git a/mercurial/registrar.py b/mercurial/registrar.py --- a/mercurial/registrar.py +++ b/mercurial/registrar.py @@ -247,6 +247,9 @@ class filesetpredicate(_funcregistrarbas implies 'matchctx.status()' at runtime or not (False, by default). + Optional argument 'weight' indicates the estimated run-time cost, useful + for static optimization, default is 1. Higher weight means more expensive. + 'filesetpredicate' instance in example above can be used to decorate multiple functions. @@ -259,8 +262,9 @@ class filesetpredicate(_funcregistrarbas _getname = _funcregistrarbase._parsefuncdecl _docformat = "``%s``\n %s" - def _extrasetup(self, name, func, callstatus=False): + def _extrasetup(self, name, func, callstatus=False, weight=1): func._callstatus = callstatus + func._weight = weight class _templateregistrarbase(_funcregistrarbase): """Base of decorator to register functions as template specific one diff --git a/tests/test-fileset.t b/tests/test-fileset.t --- a/tests/test-fileset.t +++ b/tests/test-fileset.t @@ -180,6 +180,17 @@ Show parsed tree at stages: (func (symbol 'clean') None))) + * optimized: + (or + (symbol 'a1') + (symbol 'a2') + (and + (func + (symbol 'grep') + (string 'b')) + (func + (symbol 'clean') + None))) * matcher: ,