From d9c0e690a2015544bff683cd2403491a0fa974ba 2020-03-27 21:01:53
From: Markus Wageringel <markus.wageringel+gh@gmail.com>
Date: 2020-03-27 21:01:53
Subject: [PATCH] support for unicode identifiers

This rewrites some of the regular expressions that are used to match
Python identifiers, so that they are unicode compatible. In Python 3,
identifiers can contain unicode characters as long as the first
character is not numeric.

Examples for the changes:

• inputtransformer:

```
In [1]: π = 3.14
In [2]: π.is_integer?
Object `is_integer` not found.
```

----------
• namespace:

```
π.is_integ*?
```
or
```
In [1]: %psearch π.is_integ
Python identifiers can only contain ascii characters.
```

----------
• prefilter:

```
%autocall 1
φ = float
get_ipython().prefilter("φ 3")  # should be 'φ(3)', but returns 'φ 3'
```

----------
• completerlib:

If there is a file e.g. named `π.py` in the current directory, then

```
import IPython
IPython.core.completerlib.module_list('.')  # should contain module 'π'
```

---
diff --git a/IPython/core/completerlib.py b/IPython/core/completerlib.py
index 9e592b0..7860cb6 100644
--- a/IPython/core/completerlib.py
+++ b/IPython/core/completerlib.py
@@ -52,7 +52,7 @@ TIMEOUT_STORAGE = 2
 TIMEOUT_GIVEUP = 20
 
 # Regular expression for the python import statement
-import_re = re.compile(r'(?P<name>[a-zA-Z_][a-zA-Z0-9_]*?)'
+import_re = re.compile(r'(?P<name>[^\W\d]\w*?)'
                        r'(?P<package>[/\\]__init__)?'
                        r'(?P<suffix>%s)$' %
                        r'|'.join(re.escape(s) for s in _suffixes))
diff --git a/IPython/core/inputtransformer.py b/IPython/core/inputtransformer.py
index 1c35eb6..afeca93 100644
--- a/IPython/core/inputtransformer.py
+++ b/IPython/core/inputtransformer.py
@@ -278,8 +278,8 @@ def escaped_commands(line):
 _initial_space_re = re.compile(r'\s*')
 
 _help_end_re = re.compile(r"""(%{0,2}
-                              [a-zA-Z_*][\w*]*        # Variable name
-                              (\.[a-zA-Z_*][\w*]*)*   # .etc.etc
+                              (?!\d)[\w*]+            # Variable name
+                              (\.(?!\d)[\w*]+)*       # .etc.etc
                               )
                               (\?\??)$                # ? or ??
                               """,
diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index 4562fe0..0443e68 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -405,8 +405,8 @@ class EscapedCommand(TokenTransformBase):
         return lines_before + [new_line] + lines_after
 
 _help_end_re = re.compile(r"""(%{0,2}
-                              [a-zA-Z_*][\w*]*        # Variable name
-                              (\.[a-zA-Z_*][\w*]*)*   # .etc.etc
+                              (?!\d)[\w*]+            # Variable name
+                              (\.(?!\d)[\w*]+)*       # .etc.etc
                               )
                               (\?\??)$                # ? or ??
                               """,
diff --git a/IPython/core/magics/namespace.py b/IPython/core/magics/namespace.py
index cef6ddb..acc4620 100644
--- a/IPython/core/magics/namespace.py
+++ b/IPython/core/magics/namespace.py
@@ -208,12 +208,6 @@ class NamespaceMagics(Magics):
         
           %psearch -l            list all available object types
         """
-        try:
-            parameter_s.encode('ascii')
-        except UnicodeEncodeError:
-            print('Python identifiers can only contain ascii characters.')
-            return
-
         # default namespaces to be searched
         def_search = ['user_local', 'user_global', 'builtin']
 
diff --git a/IPython/core/prefilter.py b/IPython/core/prefilter.py
index dbf185e..bf801f9 100644
--- a/IPython/core/prefilter.py
+++ b/IPython/core/prefilter.py
@@ -37,7 +37,7 @@ class PrefilterError(Exception):
 
 
 # RegExp to identify potential function names
-re_fun_name = re.compile(r'[a-zA-Z_]([a-zA-Z0-9_.]*) *$')
+re_fun_name = re.compile(r'[^\W\d]([\w.]*) *$')
 
 # RegExp to exclude strings with this start from autocalling.  In
 # particular, all binary operators should be excluded, so that if foo is
diff --git a/IPython/core/tests/test_inputtransformer.py b/IPython/core/tests/test_inputtransformer.py
index 90a1d5a..0d97fd4 100644
--- a/IPython/core/tests/test_inputtransformer.py
+++ b/IPython/core/tests/test_inputtransformer.py
@@ -113,6 +113,7 @@ syntax = \
         (u'%hist2??', "get_ipython().run_line_magic('pinfo2', '%hist2')"),
         (u'%%hist3?', "get_ipython().run_line_magic('pinfo', '%%hist3')"),
         (u'%%hist4??', "get_ipython().run_line_magic('pinfo2', '%%hist4')"),
+        (u'π.foo?', "get_ipython().run_line_magic('pinfo', 'π.foo')"),
         (u'f*?', "get_ipython().run_line_magic('psearch', 'f*')"),
         (u'ax.*aspe*?', "get_ipython().run_line_magic('psearch', 'ax.*aspe*')"),
         (u'a = abc?', "get_ipython().set_next_input('a = abc');"
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index cde9eca..b29a019 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -119,6 +119,11 @@ b) = zip?
 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
 )
 
+HELP_UNICODE = (
+    ["π.foo?\n"], (1, 0),
+    ["get_ipython().run_line_magic('pinfo', 'π.foo')\n"]
+)
+
 
 def null_cleanup_transformer(lines):
     """
@@ -223,6 +228,9 @@ def test_transform_help():
     tf = ipt2.HelpEnd((1, 0), (2, 8))
     nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
 
+    tf = ipt2.HelpEnd((1, 0), (1, 0))
+    nt.assert_equal(tf.transform(HELP_UNICODE[0]), HELP_UNICODE[2])
+
 def test_find_assign_op_dedent():
     """
     be careful that empty token like dedent are not counted as parens
diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py
index e5ad820..56feb82 100644
--- a/IPython/core/tests/test_magic.py
+++ b/IPython/core/tests/test_magic.py
@@ -602,6 +602,8 @@ def doctest_precision():
 def test_psearch():
     with tt.AssertPrints("dict.fromkeys"):
         _ip.run_cell("dict.fr*?")
+    with tt.AssertPrints("π.is_integer"):
+        _ip.run_cell("π = 3.14;\nπ.is_integ*?")
 
 def test_timeit_shlex():
     """test shlex issues with timeit (#1109)"""
diff --git a/IPython/core/tests/test_prefilter.py b/IPython/core/tests/test_prefilter.py
index 0e61b46..ca447b3 100644
--- a/IPython/core/tests/test_prefilter.py
+++ b/IPython/core/tests/test_prefilter.py
@@ -115,3 +115,13 @@ def test_prefilter_attribute_errors():
     finally:
         del ip.user_ns['x']
         ip.magic('autocall 0')
+
+
+def test_autocall_should_support_unicode():
+    ip.magic('autocall 2')
+    ip.user_ns['π'] = lambda x: x
+    try:
+        nt.assert_equal(ip.prefilter('π 3'),'π(3)')
+    finally:
+        ip.magic('autocall 0')
+        del ip.user_ns['π']