From 76622da725ae58bcab27c0a73b7e6427dec5e304 2020-05-10 19:15:22
From: Matthias Bussonnier <bussonniermatthias@gmail.com>
Date: 2020-05-10 19:15:22
Subject: [PATCH] speedup completion of unicode names

---

diff --git a/IPython/core/completer.py b/IPython/core/completer.py
index 78aa171..ee165c5 100644
--- a/IPython/core/completer.py
+++ b/IPython/core/completer.py
@@ -156,6 +156,14 @@ except ImportError:
 # Globals
 #-----------------------------------------------------------------------------
 
+# ranges where we have most of the valid unicode names. We could be more finer
+# grained but is it worth it for performace  While unicode have character in the
+# rage 0, 0x110000, we seem to have name for about 10% of those. (131808 as I
+# write this). With below range we cover them all, with a density of ~67%
+# biggest next gap we consider only adds up about 1% density and there are 600
+# gaps that would need hard coding.
+_UNICODE_RANGES = [(32, 0x2fa1e), (0xe0001, 0xe01f0)]
+
 # Public API
 __all__ = ['Completer','IPCompleter']
 
@@ -2092,7 +2100,7 @@ class IPCompleter(Completer):
                 full_text = line_buffer
             completions = self._jedi_matches(
                 cursor_pos, cursor_line, full_text)
-                
+
         if self.merge_completions:
             matches = []
             for matcher in self.matchers:
@@ -2195,6 +2203,16 @@ class IPCompleter(Completer):
                     names.append(unicodedata.name(chr(c)))
                 except ValueError:
                     pass
-            self._unicode_names = names
+            self._unicode_names = _unicode_name_compute(_UNICODE_RANGES)
 
         return self._unicode_names
+
+def _unicode_name_compute(ranges:List[Tuple[int,int]]) -> List[str]:
+    names = []
+    for start,stop in ranges:
+        for c in range(start, stop) :
+            try:
+                names.append(unicodedata.name(chr(c)))
+            except ValueError:
+                pass
+    return names
diff --git a/IPython/core/tests/test_completer.py b/IPython/core/tests/test_completer.py
index feef38c..7c54518 100644
--- a/IPython/core/tests/test_completer.py
+++ b/IPython/core/tests/test_completer.py
@@ -33,6 +33,19 @@ from nose.tools import assert_in, assert_not_in
 # Test functions
 # -----------------------------------------------------------------------------
 
+def test_unicode_range():
+    """
+    Test that the ranges we test for unicode names give the same number of
+    results than testing the full length.
+    """
+    from IPython.core.completer import  _unicode_name_compute, _UNICODE_RANGES
+
+    expected_list = _unicode_name_compute([(0, 0x110000)])
+    test = _unicode_name_compute(_UNICODE_RANGES)
+
+    assert len(expected_list) == len(test)
+    assert len(expected_list) == 131808
+
 
 @contextmanager
 def greedy_completion():
@@ -230,7 +243,7 @@ class TestCompleter(unittest.TestCase):
         ip = get_ipython()
         text, matches = ip.Completer.latex_matches("\\really_i_should_match_nothing")
         nt.assert_equal(text, "")
-        nt.assert_equal(matches, [])
+        nt.assert_equal(matches, ())
 
     def test_back_latex_completion(self):
         ip = get_ipython()