##// END OF EJS Templates
match: match explicit file using a set...
match: match explicit file using a set The matcher as all the logic to do quick comparison against explicit patterns, however the pattern matcher was shadowing the code using that set and used the compiled regex pattern in all cases, which is quite slow. We restore the usage of the set based matching to boost performance. Building the regexp is still consuming a large amount of time (actually, the majority of the time), which is still silly. Maybe using re2 would help that, but this is a quest for another adventure. Another path to improve this is to have a pattern type dedicated to match the exact path to a file only (not a directory). This pattern could use the set matching only and be skipped in the regex all together. Benchmarks ========== In the following benchmark we are comparing the `hg cat` and `hg files` run time when matching against all files in the repository. They are run: - without the rust extensions - with the standard python engine (so without re2) Performance improvement in this series -------------------------------------- ###### hg files ############################################################### ### mercurial-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 0.230092 seconds prev-changeset: 0.230069 seconds this-changeset: 0.211425 seconds (-8.36%) ### mercurial-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 0.234235 seconds prev-changeset: 0.231165 seconds (-1.38%) this-changeset: 0.212300 seconds (-9.43%) ### pypy-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 0.613567 seconds prev-changeset: 0.616799 seconds this-changeset: 0.510852 seconds (-16.82%) ### pypy-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 0.801880 seconds prev-changeset: 0.616393 seconds (-23.22%) this-changeset: 0.511903 seconds (-36.23%) ### netbeans-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 21.541828 seconds prev-changeset: 21.586773 seconds this-changeset: 13.648347 seconds (-36.76%) ### netbeans-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 172.759857 seconds prev-changeset: 21.908197 seconds (-87.32%) this-changeset: 13.945110 seconds (-91.93%) ### mozilla-central-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 62.474221 seconds prev-changeset: 61.279490 seconds (-1.22%) this-changeset: 29.529469 seconds (-52.40%) ### mozilla-central-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 1364.180218 seconds prev-changeset: 62.473549 seconds (-95.40%) this-changeset: 30.625249 seconds (-97.75%) ###### hg cat ################################################################# ### mercurial-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 0.764407 seconds prev-changeset: 0.763883 seconds this-changeset: 0.737326 seconds (-3.68%) ### mercurial-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 0.768924 seconds prev-changeset: 0.765848 seconds this-changeset: 0.174d0b seconds (-4.44%) ### pypy-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 2.065220 seconds prev-changeset: 2.070498 seconds this-changeset: 1.939482 seconds (-6.08%) ### pypy-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 2.276388 seconds prev-changeset: 2.069197 seconds (-9.15%) this-changeset: 1.931746 seconds (-15.19%) ### netbeans-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 40.967983 seconds prev-changeset: 41.392423 seconds this-changeset: 32.181681 seconds (-22.20%) ### netbeans-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 216.388709 seconds prev-changeset: 41.648689 seconds (-80.88%) this-changeset: 32.580817 seconds (-85.04%) ### mozilla-central-2018-08-01-zstd-sparse-revlog ### sorted base-changeset: 105.228510 seconds prev-changeset: 103.315670 seconds (-1.23%) this-changeset: 69.416118 seconds (-33.64%) ### mozilla-central-2018-08-01-zstd-sparse-revlog ### shuffled base-changeset: 1448.722784 seconds prev-changeset: 104.369358 seconds (-92.80%) this-changeset: 70.554789 seconds (-95.13%) Different way to list the same data with this revision ------------------------------------------------------ ###### hg files ############################################################### ### mercurial-2018-08-01-zstd-sparse-revlog root: 0.119182 seconds glob: 0.120697 seconds (+1.27%) sorted: 0.211425 seconds (+77.40%) shuffled: 0.212300 seconds (+78.13%) ### pypy-2018-08-01-zstd-sparse-revlog root: 0.121986 seconds glob: 0.124822 seconds (+2.32%) sorted: 0.510852 seconds (+318.78%) shuffled: 0.511903 seconds (+319.64%) ### netbeans-2018-08-01-zstd-sparse-revlog root: 0.173984 seconds glob: 0.227203 seconds (+30.59%) sorted: 13.648347 seconds (+7744.59%) shuffled: 13.945110 seconds (+7915.16%) ### mozilla-central-2018-08-01-zstd-sparse-revlog root: 0.366463 seconds glob: 0.491030 seconds (+33.99%) sorted: 29.529469 seconds (+7957.96%) shuffled: 30.625249 seconds (+8256.97%) ###### hg cat ################################################################# ### mercurial-2018-08-01-zstd-sparse-revlog glob: 0.647471 seconds root: 0.643120 seconds shuffled: 0.174d0b seconds (+13.92%) sorted: 0.737326 seconds (+13.88%) ### mozilla-central-2018-08-01-zstd-sparse-revlog glob: 40.596983 seconds root: 40.129136 seconds shuffled: 70.554789 seconds (+73.79%) sorted: 69.416118 seconds (+70.99%) ### netbeans-2018-08-01-zstd-sparse-revlog glob: 18.777924 seconds root: 18.613905 seconds shuffled: 32.580817 seconds (+73.51%) sorted: 32.181681 seconds (+71.38%) ### pypy-2018-08-01-zstd-sparse-revlog glob: 1.555319 seconds root: 1.536534 seconds shuffled: 1.931746 seconds (+24.20%) sorted: 1.939482 seconds (+24.70%)

File last commit:

r49801:642e31cb default
r51286:81c7d04f stable
Show More
fancyopts.py
390 lines | 11.3 KiB | text/x-python | PythonLexer
# fancyopts.py - better command line parsing
#
# Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import abc
import functools
from .i18n import _
from . import (
error,
pycompat,
)
# Set of flags to not apply boolean negation logic on
nevernegate = {
# avoid --no-noninteractive
b'noninteractive',
# These two flags are special because they cause hg to do one
# thing and then exit, and so aren't suitable for use in things
# like aliases anyway.
b'help',
b'version',
}
def _earlyoptarg(arg, shortlist, namelist):
"""Check if the given arg is a valid unabbreviated option
Returns (flag_str, has_embedded_value?, embedded_value, takes_value?)
>>> def opt(arg):
... return _earlyoptarg(arg, b'R:q', [b'cwd=', b'debugger'])
long form:
>>> opt(b'--cwd')
('--cwd', False, '', True)
>>> opt(b'--cwd=')
('--cwd', True, '', True)
>>> opt(b'--cwd=foo')
('--cwd', True, 'foo', True)
>>> opt(b'--debugger')
('--debugger', False, '', False)
>>> opt(b'--debugger=') # invalid but parsable
('--debugger', True, '', False)
short form:
>>> opt(b'-R')
('-R', False, '', True)
>>> opt(b'-Rfoo')
('-R', True, 'foo', True)
>>> opt(b'-q')
('-q', False, '', False)
>>> opt(b'-qfoo') # invalid but parsable
('-q', True, 'foo', False)
unknown or invalid:
>>> opt(b'--unknown')
('', False, '', False)
>>> opt(b'-u')
('', False, '', False)
>>> opt(b'-ufoo')
('', False, '', False)
>>> opt(b'--')
('', False, '', False)
>>> opt(b'-')
('', False, '', False)
>>> opt(b'-:')
('', False, '', False)
>>> opt(b'-:foo')
('', False, '', False)
"""
if arg.startswith(b'--'):
flag, eq, val = arg.partition(b'=')
if flag[2:] in namelist:
return flag, bool(eq), val, False
if flag[2:] + b'=' in namelist:
return flag, bool(eq), val, True
elif arg.startswith(b'-') and arg != b'-' and not arg.startswith(b'-:'):
flag, val = arg[:2], arg[2:]
i = shortlist.find(flag[1:])
if i >= 0:
return flag, bool(val), val, shortlist.startswith(b':', i + 1)
return b'', False, b'', False
def earlygetopt(args, shortlist, namelist, gnu=False, keepsep=False):
"""Parse options like getopt, but ignores unknown options and abbreviated
forms
If gnu=False, this stops processing options as soon as a non/unknown-option
argument is encountered. Otherwise, option and non-option arguments may be
intermixed, and unknown-option arguments are taken as non-option.
If keepsep=True, '--' won't be removed from the list of arguments left.
This is useful for stripping early options from a full command arguments.
>>> def get(args, gnu=False, keepsep=False):
... return earlygetopt(args, b'R:q', [b'cwd=', b'debugger'],
... gnu=gnu, keepsep=keepsep)
default parsing rules for early options:
>>> get([b'x', b'--cwd', b'foo', b'-Rbar', b'-q', b'y'], gnu=True)
([('--cwd', 'foo'), ('-R', 'bar'), ('-q', '')], ['x', 'y'])
>>> get([b'x', b'--cwd=foo', b'y', b'-R', b'bar', b'--debugger'], gnu=True)
([('--cwd', 'foo'), ('-R', 'bar'), ('--debugger', '')], ['x', 'y'])
>>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=True)
([('--cwd', 'foo')], ['--unknown', '--debugger'])
restricted parsing rules (early options must come first):
>>> get([b'--cwd', b'foo', b'-Rbar', b'x', b'-q', b'y'], gnu=False)
([('--cwd', 'foo'), ('-R', 'bar')], ['x', '-q', 'y'])
>>> get([b'--cwd=foo', b'x', b'y', b'-R', b'bar', b'--debugger'], gnu=False)
([('--cwd', 'foo')], ['x', 'y', '-R', 'bar', '--debugger'])
>>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=False)
([], ['--unknown', '--cwd=foo', '--', '--debugger'])
stripping early options (without loosing '--'):
>>> get([b'x', b'-Rbar', b'--', '--debugger'], gnu=True, keepsep=True)[1]
['x', '--', '--debugger']
last argument:
>>> get([b'--cwd'])
([], ['--cwd'])
>>> get([b'--cwd=foo'])
([('--cwd', 'foo')], [])
>>> get([b'-R'])
([], ['-R'])
>>> get([b'-Rbar'])
([('-R', 'bar')], [])
>>> get([b'-q'])
([('-q', '')], [])
>>> get([b'-q', b'--'])
([('-q', '')], [])
'--' may be a value:
>>> get([b'-R', b'--', b'x'])
([('-R', '--')], ['x'])
>>> get([b'--cwd', b'--', b'x'])
([('--cwd', '--')], ['x'])
value passed to bool options:
>>> get([b'--debugger=foo', b'x'])
([], ['--debugger=foo', 'x'])
>>> get([b'-qfoo', b'x'])
([], ['-qfoo', 'x'])
short option isn't separated with '=':
>>> get([b'-R=bar'])
([('-R', '=bar')], [])
':' may be in shortlist, but shouldn't be taken as an option letter:
>>> get([b'-:', b'y'])
([], ['-:', 'y'])
'-' is a valid non-option argument:
>>> get([b'-', b'y'])
([], ['-', 'y'])
"""
parsedopts = []
parsedargs = []
pos = 0
while pos < len(args):
arg = args[pos]
if arg == b'--':
pos += not keepsep
break
flag, hasval, val, takeval = _earlyoptarg(arg, shortlist, namelist)
if not hasval and takeval and pos + 1 >= len(args):
# missing last argument
break
if not flag or hasval and not takeval:
# non-option argument or -b/--bool=INVALID_VALUE
if gnu:
parsedargs.append(arg)
pos += 1
else:
break
elif hasval == takeval:
# -b/--bool or -s/--str=VALUE
parsedopts.append((flag, val))
pos += 1
else:
# -s/--str VALUE
parsedopts.append((flag, args[pos + 1]))
pos += 2
parsedargs.extend(args[pos:])
return parsedopts, parsedargs
class customopt: # pytype: disable=ignored-metaclass
"""Manage defaults and mutations for any type of opt."""
__metaclass__ = abc.ABCMeta
def __init__(self, defaultvalue):
self._defaultvalue = defaultvalue
def _isboolopt(self):
return False
def getdefaultvalue(self):
"""Returns the default value for this opt.
Subclasses should override this to return a new value if the value type
is mutable."""
return self._defaultvalue
@abc.abstractmethod
def newstate(self, oldstate, newparam, abort):
"""Adds newparam to oldstate and returns the new state.
On failure, abort can be called with a string error message."""
class _simpleopt(customopt):
def _isboolopt(self):
return isinstance(self._defaultvalue, (bool, type(None)))
def newstate(self, oldstate, newparam, abort):
return newparam
class _callableopt(customopt):
def __init__(self, callablefn):
self.callablefn = callablefn
super(_callableopt, self).__init__(None)
def newstate(self, oldstate, newparam, abort):
return self.callablefn(newparam)
class _listopt(customopt):
def getdefaultvalue(self):
return self._defaultvalue[:]
def newstate(self, oldstate, newparam, abort):
oldstate.append(newparam)
return oldstate
class _intopt(customopt):
def newstate(self, oldstate, newparam, abort):
try:
return int(newparam)
except ValueError:
abort(_(b'expected int'))
def _defaultopt(default):
"""Returns a default opt implementation, given a default value."""
if isinstance(default, customopt):
return default
elif callable(default):
return _callableopt(default)
elif isinstance(default, list):
return _listopt(default[:])
elif type(default) is type(1):
return _intopt(default)
else:
return _simpleopt(default)
def fancyopts(args, options, state, gnu=False, early=False, optaliases=None):
"""
read args, parse options, and store options in state
each option is a tuple of:
short option or ''
long option
default value
description
option value label(optional)
option types include:
boolean or none - option sets variable in state to true
string - parameter string is stored in state
list - parameter string is added to a list
integer - parameter strings is stored as int
function - call function with parameter
customopt - subclass of 'customopt'
optaliases is a mapping from a canonical option name to a list of
additional long options. This exists for preserving backward compatibility
of early options. If we want to use it extensively, please consider moving
the functionality to the options table (e.g separate long options by '|'.)
non-option args are returned
"""
if optaliases is None:
optaliases = {}
namelist = []
shortlist = b''
argmap = {}
defmap = {}
negations = {}
alllong = {o[1] for o in options}
for option in options:
if len(option) == 5:
short, name, default, comment, dummy = option
else:
short, name, default, comment = option
# convert opts to getopt format
onames = [name]
onames.extend(optaliases.get(name, []))
name = name.replace(b'-', b'_')
argmap[b'-' + short] = name
for n in onames:
argmap[b'--' + n] = name
defmap[name] = _defaultopt(default)
# copy defaults to state
state[name] = defmap[name].getdefaultvalue()
# does it take a parameter?
if not defmap[name]._isboolopt():
if short:
short += b':'
onames = [n + b'=' for n in onames]
elif name not in nevernegate:
for n in onames:
if n.startswith(b'no-'):
insert = n[3:]
else:
insert = b'no-' + n
# backout (as a practical example) has both --commit and
# --no-commit options, so we don't want to allow the
# negations of those flags.
if insert not in alllong:
assert (b'--' + n) not in negations
negations[b'--' + insert] = b'--' + n
namelist.append(insert)
if short:
shortlist += short
if name:
namelist.extend(onames)
# parse arguments
if early:
parse = functools.partial(earlygetopt, gnu=gnu)
elif gnu:
parse = pycompat.gnugetoptb
else:
parse = pycompat.getoptb
opts, args = parse(args, shortlist, namelist)
# transfer result to state
for opt, val in opts:
boolval = True
negation = negations.get(opt, False)
if negation:
opt = negation
boolval = False
name = argmap[opt]
obj = defmap[name]
if obj._isboolopt():
state[name] = boolval
else:
def abort(s):
raise error.InputError(
_(b'invalid value %r for option %s, %s')
% (pycompat.maybebytestr(val), opt, s)
)
state[name] = defmap[name].newstate(state[name], val, abort)
# return unparsed args
return args