diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py new file mode 100644 --- /dev/null +++ b/mercurial/minifileset.py @@ -0,0 +1,91 @@ +# minifileset.py - a simple language to select files +# +# Copyright 2017 Facebook, Inc. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from .i18n import _ +from . import ( + error, + fileset, +) + +def _compile(tree): + if not tree: + raise error.ParseError(_("missing argument")) + op = tree[0] + if op == 'symbol': + name = fileset.getstring(tree, _('invalid file pattern')) + if name.startswith('**'): # file extension test, ex. "**.tar.gz" + ext = name[2:] + for c in ext: + if c in '*{}[]?/\\': + raise error.ParseError(_('reserved character: %s') % c) + return lambda n, s: n.endswith(ext) + raise error.ParseError(_('invalid symbol: %s') % name) + elif op == 'string': + # TODO: teach fileset about 'path:', so that this can be a symbol and + # not require quoting. + name = fileset.getstring(tree, _('invalid path literal')) + if name.startswith('path:'): # directory or full path test + p = name[5:] # prefix + pl = len(p) + f = lambda n, s: n.startswith(p) and (len(n) == pl or n[pl] == '/') + return f + raise error.ParseError(_("invalid string"), + hint=_('paths must be prefixed with "path:"')) + elif op == 'or': + func1 = _compile(tree[1]) + func2 = _compile(tree[2]) + return lambda n, s: func1(n, s) or func2(n, s) + elif op == 'and': + func1 = _compile(tree[1]) + func2 = _compile(tree[2]) + return lambda n, s: func1(n, s) and func2(n, s) + elif op == 'not': + return lambda n, s: not _compile(tree[1])(n, s) + elif op == 'group': + return _compile(tree[1]) + elif op == 'func': + symbols = { + 'all': lambda n, s: True, + 'none': lambda n, s: False, + 'size': lambda n, s: fileset.sizematcher(tree[2])(s), + } + + x = tree[1] + name = x[1] + if x[0] == 'symbol' and name in symbols: + return symbols[name] + + raise error.UnknownIdentifier(name, symbols.keys()) + elif op == 'minus': # equivalent to 'x and not y' + func1 = _compile(tree[1]) + func2 = _compile(tree[2]) + return lambda n, s: func1(n, s) and not func2(n, s) + elif op == 'negate': + raise error.ParseError(_("can't use negate operator in this context")) + elif op == 'list': + raise error.ParseError(_("can't use a list in this context"), + hint=_('see hg help "filesets.x or y"')) + raise error.ProgrammingError('illegal tree: %r' % (tree,)) + +def compile(text): + """generate a function (path, size) -> bool from filter specification. + + "text" could contain the operators defined by the fileset language for + common logic operations, and parenthesis for grouping. The supported path + tests are '**.extname' for file extension test, and '"path:dir/subdir"' + for prefix test. The ``size()`` predicate is borrowed from filesets to test + file size. The predicates ``all()`` and ``none()`` are also supported. + + '(**.php & size(">10MB")) | **.zip | ("path:bin" & !"path:bin/README")' for + example, will catch all php files whose size is greater than 10 MB, all + files whose name ends with ".zip", and all files under "bin" in the repo + root except for "bin/README". + """ + tree = fileset.parse(text) + return _compile(tree) diff --git a/tests/test-minifileset.py b/tests/test-minifileset.py new file mode 100644 --- /dev/null +++ b/tests/test-minifileset.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys + +# make it runnable directly without run-tests.py +sys.path[0:0] = [os.path.join(os.path.dirname(__file__), '..')] + +from mercurial import minifileset + +def check(text, truecases, falsecases): + f = minifileset.compile(text) + for args in truecases: + if not f(*args): + print('unexpected: %r should include %r' % (text, args)) + for args in falsecases: + if f(*args): + print('unexpected: %r should exclude %r' % (text, args)) + +check('all()', [('a.php', 123), ('b.txt', 0)], []) +check('none()', [], [('a.php', 123), ('b.txt', 0)]) +check('!!!!((!(!!all())))', [], [('a.php', 123), ('b.txt', 0)]) + +check('"path:a" & (**.b | **.c)', [('a/b.b', 0), ('a/c.c', 0)], [('b/c.c', 0)]) +check('("path:a" & **.b) | **.c', + [('a/b.b', 0), ('a/c.c', 0), ('b/c.c', 0)], []) + +check('**.bin - size("<20B")', [('b.bin', 21)], [('a.bin', 11), ('b.txt', 21)]) + +check('!!**.bin or size(">20B") + "path:bin" or !size(">10")', + [('a.bin', 11), ('b.txt', 21), ('bin/abc', 11)], + [('a.notbin', 11), ('b.txt', 11), ('bin2/abc', 11)]) + +check('(**.php and size(">10KB")) | **.zip | ("path:bin" & !"path:bin/README") ' + ' | size(">1M")', + [('a.php', 15000), ('a.zip', 0), ('bin/a', 0), ('bin/README', 1e7)], + [('a.php', 5000), ('b.zip2', 0), ('t/bin/a', 0), ('bin/README', 1)])