From 4999ce9c092172e9f3b73cfaa1a6ef1832ed8b03 2020-05-11 19:15:55 From: Matthias Bussonnier Date: 2020-05-11 19:15:55 Subject: [PATCH] Merge pull request #12282 from Carreau/completer-typing --- diff --git a/.travis.yml b/.travis.yml index a049cf4..8441023 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,7 @@ install: - pip install trio curio --upgrade --upgrade-strategy eager - pip install pytest 'matplotlib !=3.2.0' mypy - pip install codecov check-manifest --upgrade + - pip install mypy script: - check-manifest @@ -50,7 +51,8 @@ script: fi - cd /tmp && iptest --coverage xml && cd - - pytest IPython - - mypy --ignore-missing-imports -m IPython.terminal.ptutils + - mypy IPython/terminal/ptutils.py + - mypy IPython/core/c*.py # On the latest Python (on Linux) only, make sure that the docs build. - | if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] && [[ "$TRAVIS_OS_NAME" == "linux" ]]; then diff --git a/IPython/core/completer.py b/IPython/core/completer.py index 9fd3598..ee165c5 100644 --- a/IPython/core/completer.py +++ b/IPython/core/completer.py @@ -126,7 +126,7 @@ import warnings from contextlib import contextmanager from importlib import import_module from types import SimpleNamespace -from typing import Iterable, Iterator, List, Tuple +from typing import Iterable, Iterator, List, Tuple, Union, Any, Sequence, Dict, NamedTuple, Pattern, Optional from IPython.core.error import TryNext from IPython.core.inputtransformer2 import ESC_MAGIC @@ -156,6 +156,14 @@ except ImportError: # Globals #----------------------------------------------------------------------------- +# ranges where we have most of the valid unicode names. We could be more finer +# grained but is it worth it for performace While unicode have character in the +# rage 0, 0x110000, we seem to have name for about 10% of those. (131808 as I +# write this). With below range we cover them all, with a density of ~67% +# biggest next gap we consider only adds up about 1% density and there are 600 +# gaps that would need hard coding. +_UNICODE_RANGES = [(32, 0x2fa1e), (0xe0001, 0xe01f0)] + # Public API __all__ = ['Completer','IPCompleter'] @@ -745,7 +753,7 @@ def get__all__entries(obj): return [w for w in words if isinstance(w, str)] -def match_dict_keys(keys: List[str], prefix: str, delims: str): +def match_dict_keys(keys: List[Union[str, bytes]], prefix: str, delims: str) -> Tuple[str, int, List[str]]: """Used by dict_key_matches, matching the prefix to a list of keys Parameters @@ -766,22 +774,25 @@ def match_dict_keys(keys: List[str], prefix: str, delims: str): ``matches`` a list of replacement/completion """ + keys = [k for k in keys if isinstance(k, (str, bytes))] if not prefix: - return None, 0, [repr(k) for k in keys + return '', 0, [repr(k) for k in keys if isinstance(k, (str, bytes))] quote_match = re.search('["\']', prefix) + assert quote_match is not None # silence mypy quote = quote_match.group() try: prefix_str = eval(prefix + quote, {}) except Exception: - return None, 0, [] + return '', 0, [] pattern = '[^' + ''.join('\\' + c for c in delims) + ']*$' token_match = re.search(pattern, prefix, re.UNICODE) + assert token_match is not None # silence mypy token_start = token_match.start() token_prefix = token_match.group() - matched = [] + matched:List[str] = [] for key in keys: try: if not key.startswith(prefix_str): @@ -794,14 +805,6 @@ def match_dict_keys(keys: List[str], prefix: str, delims: str): rem = key[len(prefix_str):] # force repr wrapped in ' rem_repr = repr(rem + '"') if isinstance(rem, str) else repr(rem + b'"') - if rem_repr.startswith('u') and prefix[0] not in 'uU': - # Found key is unicode, but prefix is Py2 string. - # Therefore attempt to interpret key as string. - try: - rem_repr = repr(rem.encode('ascii') + '"') - except UnicodeEncodeError: - continue - rem_repr = rem_repr[1 + rem_repr.index("'"):-2] if quote == '"': # The entered prefix is quoted with ", @@ -887,9 +890,8 @@ def _safe_isinstance(obj, module, class_name): return (module in sys.modules and isinstance(obj, getattr(import_module(module), class_name))) - -def back_unicode_name_matches(text): - u"""Match unicode characters back to unicode name +def back_unicode_name_matches(text:str) -> Tuple[str, Sequence[str]]: + """Match Unicode characters back to Unicode name This does ``☃`` -> ``\\snowman`` @@ -898,52 +900,60 @@ def back_unicode_name_matches(text): This will not either back-complete standard sequences like \\n, \\b ... - Used on Python 3 only. + Returns + ======= + + Return a tuple with two elements: + + - The Unicode character that was matched (preceded with a backslash), or + empty string, + - a sequence (of 1), name for the match Unicode character, preceded by + backslash, or empty if no match. + """ if len(text)<2: - return u'', () + return '', () maybe_slash = text[-2] if maybe_slash != '\\': - return u'', () + return '', () char = text[-1] # no expand on quote for completion in strings. # nor backcomplete standard ascii keys - if char in string.ascii_letters or char in ['"',"'"]: - return u'', () + if char in string.ascii_letters or char in ('"',"'"): + return '', () try : unic = unicodedata.name(char) - return '\\'+char,['\\'+unic] + return '\\'+char,('\\'+unic,) except KeyError: pass - return u'', () + return '', () -def back_latex_name_matches(text:str): +def back_latex_name_matches(text:str) -> Tuple[str, Sequence[str]] : """Match latex characters back to unicode name This does ``\\ℵ`` -> ``\\aleph`` - Used on Python 3 only. """ if len(text)<2: - return u'', () + return '', () maybe_slash = text[-2] if maybe_slash != '\\': - return u'', () + return '', () char = text[-1] # no expand on quote for completion in strings. # nor backcomplete standard ascii keys - if char in string.ascii_letters or char in ['"',"'"]: - return u'', () + if char in string.ascii_letters or char in ('"',"'"): + return '', () try : latex = reverse_latex_symbol[char] # '\\' replace the \ as well return '\\'+char,[latex] except KeyError: pass - return u'', () + return '', () def _formatparamchildren(parameter) -> str: @@ -1002,9 +1012,19 @@ def _make_signature(completion)-> str: return '(%s)'% ', '.join([f for f in (_formatparamchildren(p) for signature in completion.get_signatures() for p in signature.defined_names()) if f]) + +class _CompleteResult(NamedTuple): + matched_text : str + matches: Sequence[str] + matches_origin: Sequence[str] + jedi_matches: Any + + class IPCompleter(Completer): """Extension of the completer class with IPython-specific features""" + __dict_key_regexps: Optional[Dict[bool,Pattern]] = None + @observe('greedy') def _greedy_changed(self, change): """update the splitter and readline delims when greedy is changed""" @@ -1143,7 +1163,7 @@ class IPCompleter(Completer): self._unicode_names = None @property - def matchers(self): + def matchers(self) -> List[Any]: """All active matcher routines for completion""" if self.dict_keys_only: return [self.dict_key_matches] @@ -1165,7 +1185,7 @@ class IPCompleter(Completer): self.dict_key_matches, ] - def all_completions(self, text) -> List[str]: + def all_completions(self, text:str) -> List[str]: """ Wrapper around the completion methods for the benefit of emacs. """ @@ -1176,14 +1196,14 @@ class IPCompleter(Completer): return self.complete(text)[1] - def _clean_glob(self, text): + def _clean_glob(self, text:str): return self.glob("%s*" % text) - def _clean_glob_win32(self,text): + def _clean_glob_win32(self, text:str): return [f.replace("\\","/") for f in self.glob("%s*" % text)] - def file_matches(self, text): + def file_matches(self, text:str)->List[str]: """Match filenames, expanding ~USER type strings. Most of the seemingly convoluted logic in this completer is an @@ -1265,7 +1285,7 @@ class IPCompleter(Completer): # Mark directories in input list by appending '/' to their names. return [x+'/' if os.path.isdir(x) else x for x in matches] - def magic_matches(self, text): + def magic_matches(self, text:str): """Match magics""" # Get all shell magics now rather than statically, so magics loaded at # runtime show up too. @@ -1356,7 +1376,7 @@ class IPCompleter(Completer): if color.startswith(prefix) ] return [] - def _jedi_matches(self, cursor_column:int, cursor_line:int, text:str): + def _jedi_matches(self, cursor_column:int, cursor_line:int, text:str) -> Iterable[Any]: """ Return a list of :any:`jedi.api.Completions` object from a ``text`` and @@ -1430,7 +1450,7 @@ class IPCompleter(Completer): else: return [] - def python_matches(self, text): + def python_matches(self, text:str)->List[str]: """Match attributes or global python names""" if "." in text: try: @@ -1512,7 +1532,7 @@ class IPCompleter(Completer): return list(set(ret)) - def python_func_kw_matches(self,text): + def python_func_kw_matches(self, text): """Match named parameters (kwargs) of the last open function""" if "." in text: # a parameter cannot be dotted @@ -1582,36 +1602,39 @@ class IPCompleter(Completer): # Remove used named arguments from the list, no need to show twice for namedArg in set(namedArgs) - usedNamedArgs: if namedArg.startswith(text): - argMatches.append(u"%s=" %namedArg) + argMatches.append("%s=" %namedArg) except: pass return argMatches - def dict_key_matches(self, text): + @staticmethod + def _get_keys(obj: Any) -> List[Any]: + # Objects can define their own completions by defining an + # _ipy_key_completions_() method. + method = get_real_method(obj, '_ipython_key_completions_') + if method is not None: + return method() + + # Special case some common in-memory dict-like types + if isinstance(obj, dict) or\ + _safe_isinstance(obj, 'pandas', 'DataFrame'): + try: + return list(obj.keys()) + except Exception: + return [] + elif _safe_isinstance(obj, 'numpy', 'ndarray') or\ + _safe_isinstance(obj, 'numpy', 'void'): + return obj.dtype.names or [] + return [] + + def dict_key_matches(self, text:str) -> List[str]: "Match string keys in a dictionary, after e.g. 'foo[' " - def get_keys(obj): - # Objects can define their own completions by defining an - # _ipy_key_completions_() method. - method = get_real_method(obj, '_ipython_key_completions_') - if method is not None: - return method() - - # Special case some common in-memory dict-like types - if isinstance(obj, dict) or\ - _safe_isinstance(obj, 'pandas', 'DataFrame'): - try: - return list(obj.keys()) - except Exception: - return [] - elif _safe_isinstance(obj, 'numpy', 'ndarray') or\ - _safe_isinstance(obj, 'numpy', 'void'): - return obj.dtype.names or [] - return [] - try: + + if self.__dict_key_regexps is not None: regexps = self.__dict_key_regexps - except AttributeError: + else: dict_key_re_fmt = r'''(?x) ( # match dict-referring expression wrt greedy setting %s @@ -1651,7 +1674,7 @@ class IPCompleter(Completer): except Exception: return [] - keys = get_keys(obj) + keys = self._get_keys(obj) if not keys: return keys closing_quote, token_offset, matches = match_dict_keys(keys, prefix, self.splitter.delims) @@ -1696,16 +1719,15 @@ class IPCompleter(Completer): return [leading + k + suf for k in matches] - def unicode_name_matches(self, text): - u"""Match Latex-like syntax for unicode characters base + @staticmethod + def unicode_name_matches(text:str) -> Tuple[str, List[str]] : + """Match Latex-like syntax for unicode characters base on the name of the character. This does ``\\GREEK SMALL LETTER ETA`` -> ``η`` Works only on valid python 3 identifier, or on combining characters that will combine to form a valid identifier. - - Used on Python 3 only. """ slashpos = text.rfind('\\') if slashpos > -1: @@ -1717,11 +1739,11 @@ class IPCompleter(Completer): return '\\'+s,[unic] except KeyError: pass - return u'', [] + return '', [] - def latex_matches(self, text): - u"""Match Latex syntax for unicode characters. + def latex_matches(self, text:str) -> Tuple[str, Sequence[str]]: + """Match Latex syntax for unicode characters. This does both ``\\alp`` -> ``\\alpha`` and ``\\alpha`` -> ``α`` """ @@ -1738,7 +1760,7 @@ class IPCompleter(Completer): matches = [k for k in latex_symbols if k.startswith(s)] if matches: return s, matches - return u'', [] + return '', () def dispatch_custom_completer(self, text): if not self.custom_completers: @@ -1839,6 +1861,7 @@ class IPCompleter(Completer): category=ProvisionalCompleterWarning, stacklevel=2) seen = set() + profiler:Optional[cProfile.Profile] try: if self.profile_completions: import cProfile @@ -1864,7 +1887,7 @@ class IPCompleter(Completer): print("Writing profiler output to", output_path) profiler.dump_stats(output_path) - def _completions(self, full_text: str, offset: int, *, _timeout)->Iterator[Completion]: + def _completions(self, full_text: str, offset: int, *, _timeout) -> Iterator[Completion]: """ Core completion module.Same signature as :any:`completions`, with the extra `timeout` parameter (in seconds). @@ -1949,7 +1972,7 @@ class IPCompleter(Completer): yield Completion(start=start_offset, end=offset, text=m, _origin=t, signature='', type='') - def complete(self, text=None, line_buffer=None, cursor_pos=None): + def complete(self, text=None, line_buffer=None, cursor_pos=None) -> Tuple[str, Sequence[str]]: """Find completions for the given text and line context. Note that both the text and the line_buffer are optional, but at least @@ -1973,9 +1996,9 @@ class IPCompleter(Completer): Returns ------- + Tuple of two items: text : str Text that was actually used in the completion. - matches : list A list of completion matches. @@ -1995,7 +2018,7 @@ class IPCompleter(Completer): return self._complete(line_buffer=line_buffer, cursor_pos=cursor_pos, text=text, cursor_line=0)[:2] def _complete(self, *, cursor_line, cursor_pos, line_buffer=None, text=None, - full_text=None) -> Tuple[str, List[str], List[str], Iterable[_FakeJediCompletion]]: + full_text=None) -> _CompleteResult: """ Like complete but can also returns raw jedi completions as well as the @@ -2008,8 +2031,19 @@ class IPCompleter(Completer): caller) as the offset in the ``text`` or ``line_buffer``, or as the ``column`` when passing multiline strings this could/should be renamed but would add extra noise. + + Return + ====== + + A tuple of N elements which are (likely): + + matched_text: ? the text that the complete matched + matches: list of completions ? + matches_origin: ? list same lenght as matches, and where each completion came from + jedi_matches: list of Jedi matches, have it's own structure. """ + # if the cursor position isn't given, the only sane assumption we can # make is that it's at the end of the line (the common case) if cursor_pos is None: @@ -2027,17 +2061,16 @@ class IPCompleter(Completer): if self.backslash_combining_completions: # allow deactivation of these on windows. base_text = text if not line_buffer else line_buffer[:cursor_pos] - latex_text, latex_matches = self.latex_matches(base_text) - if latex_matches: - return latex_text, latex_matches, ['latex_matches']*len(latex_matches), () - name_text = '' - name_matches = [] - # need to add self.fwd_unicode_match() function here when done - for meth in (self.unicode_name_matches, back_latex_name_matches, back_unicode_name_matches, self.fwd_unicode_match): + + for meth in (self.latex_matches, + self.unicode_name_matches, + back_latex_name_matches, + back_unicode_name_matches, + self.fwd_unicode_match): name_text, name_matches = meth(base_text) if name_text: - return name_text, name_matches[:MATCHES_LIMIT], \ - [meth.__qualname__]*min(len(name_matches), MATCHES_LIMIT), () + return _CompleteResult(name_text, name_matches[:MATCHES_LIMIT], \ + [meth.__qualname__]*min(len(name_matches), MATCHES_LIMIT), ()) # If no line buffer is given, assume the input text is all there was @@ -2052,7 +2085,7 @@ class IPCompleter(Completer): matches = list(matcher(line_buffer))[:MATCHES_LIMIT] if matches: origins = [matcher.__qualname__] * len(matches) - return text, matches, origins, () + return _CompleteResult(text, matches, origins, ()) # Start with a clean slate of completions matches = [] @@ -2061,13 +2094,13 @@ class IPCompleter(Completer): # different types of objects. The rlcomplete() method could then # simply collapse the dict into a list for readline, but we'd have # richer completion semantics in other environments. - completions = () + completions:Iterable[Any] = [] if self.use_jedi: if not full_text: full_text = line_buffer completions = self._jedi_matches( cursor_pos, cursor_line, full_text) - + if self.merge_completions: matches = [] for matcher in self.matchers: @@ -2105,9 +2138,38 @@ class IPCompleter(Completer): self.matches = _matches - return text, _matches, origins, completions + return _CompleteResult(text, _matches, origins, completions) - def fwd_unicode_match(self, text:str) -> Tuple[str, list]: + def fwd_unicode_match(self, text:str) -> Tuple[str, Sequence[str]]: + """ + + Forward match a string starting with a backslash with a list of + potential Unicode completions. + + Will compute list list of Unicode character names on first call and cache it. + + Return + ====== + + At tuple with: + - matched text (empty if no matches) + - list of potential completions, empty tuple otherwise) + """ + # TODO: self.unicode_names is here a list we traverse each time with ~100k elements. + # We could do a faster match using a Trie. + + # Using pygtrie the follwing seem to work: + + # s = PrefixSet() + + # for c in range(0,0x10FFFF + 1): + # try: + # s.add(unicodedata.name(chr(c))) + # except ValueError: + # pass + # [''.join(k) for k in s.iter(prefix)] + + # But need to be timed and adds an extra dependency. slashpos = text.rfind('\\') # if text starts with slash @@ -2126,7 +2188,7 @@ class IPCompleter(Completer): # if text does not start with slash else: - return u'', () + return '', () @property def unicode_names(self) -> List[str]: @@ -2141,6 +2203,16 @@ class IPCompleter(Completer): names.append(unicodedata.name(chr(c))) except ValueError: pass - self._unicode_names = names + self._unicode_names = _unicode_name_compute(_UNICODE_RANGES) return self._unicode_names + +def _unicode_name_compute(ranges:List[Tuple[int,int]]) -> List[str]: + names = [] + for start,stop in ranges: + for c in range(start, stop) : + try: + names.append(unicodedata.name(chr(c))) + except ValueError: + pass + return names diff --git a/IPython/core/tests/test_completer.py b/IPython/core/tests/test_completer.py index 2c19e2e..413fc4f 100644 --- a/IPython/core/tests/test_completer.py +++ b/IPython/core/tests/test_completer.py @@ -33,6 +33,25 @@ from nose.tools import assert_in, assert_not_in # Test functions # ----------------------------------------------------------------------------- +def test_unicode_range(): + """ + Test that the ranges we test for unicode names give the same number of + results than testing the full length. + """ + from IPython.core.completer import _unicode_name_compute, _UNICODE_RANGES + + expected_list = _unicode_name_compute([(0, 0x110000)]) + test = _unicode_name_compute(_UNICODE_RANGES) + len_exp = len(expected_list) + len_test = len(test) + + # do not inline the len() or on error pytest will try to print the 130 000 + + # elements. + assert len_exp == len_test + + # fail if new unicode symbols have been added. + assert len_exp <= 131808 + @contextmanager def greedy_completion(): @@ -212,9 +231,8 @@ class TestCompleter(unittest.TestCase): keys = random.sample(latex_symbols.keys(), 10) for k in keys: text, matches = ip.complete(k) - nt.assert_equal(len(matches), 1) nt.assert_equal(text, k) - nt.assert_equal(matches[0], latex_symbols[k]) + nt.assert_equal(matches, [latex_symbols[k]]) # Test a more complex line text, matches = ip.complete("print(\\alpha") nt.assert_equal(text, "\\alpha") @@ -231,7 +249,7 @@ class TestCompleter(unittest.TestCase): ip = get_ipython() text, matches = ip.Completer.latex_matches("\\really_i_should_match_nothing") nt.assert_equal(text, "") - nt.assert_equal(matches, []) + nt.assert_equal(matches, ()) def test_back_latex_completion(self): ip = get_ipython() @@ -244,14 +262,14 @@ class TestCompleter(unittest.TestCase): ip = get_ipython() name, matches = ip.complete("\\Ⅴ") - nt.assert_equal(matches, ["\\ROMAN NUMERAL FIVE"]) + nt.assert_equal(matches, ("\\ROMAN NUMERAL FIVE",)) def test_forward_unicode_completion(self): ip = get_ipython() name, matches = ip.complete("\\ROMAN NUMERAL FIVE") - nt.assert_equal(len(matches), 1) - nt.assert_equal(matches[0], "Ⅴ") + nt.assert_equal(matches, ["Ⅴ"] ) # This is not a V + nt.assert_equal(matches, ["\u2164"] ) # same as above but explicit. @nt.nottest # now we have a completion for \jmath @decorators.knownfailureif( diff --git a/MANIFEST.in b/MANIFEST.in index d47a16d..5bc59b9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include setupbase.py include setupegg.py include MANIFEST.in include pytest.ini +include mypy.ini include .mailmap recursive-exclude tools * diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..998aed5 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,4 @@ +[mypy] +python_version = 3.6 +ignore_missing_imports = True +follow_imports = silent