upstream/ipython Files · tools/gen_latex_symbols.py

support for unicode identifiers...

support for unicode identifiers This rewrites some of the regular expressions that are used to match Python identifiers, so that they are unicode compatible. In Python 3, identifiers can contain unicode characters as long as the first character is not numeric. Examples for the changes: • inputtransformer: ``` In [1]: π = 3.14 In [2]: π.is_integer? Object `is_integer` not found. ``` ---------- • namespace: ``` π.is_integ*? ``` or ``` In [1]: %psearch π.is_integ Python identifiers can only contain ascii characters. ``` ---------- • prefilter: ``` %autocall 1 φ = float get_ipython().prefilter("φ 3") # should be 'φ(3)', but returns 'φ 3' ``` ---------- • completerlib: If there is a file e.g. named `π.py` in the current directory, then ``` import IPython IPython.core.completerlib.module_list('.') # should contain module 'π' ```

kousik - - Load All Authors

File last commit:

r25245:39b51818


                r25595:d9c0e690

Download file

             gen_latex_symbols.py
        
                    88 lines
            
             | 2.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / tools / gen_latex_symbols.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # coding: utf-8

      # This script autogenerates `IPython.core.latex_symbols.py`, which contains a

      # single dict , named `latex_symbols`. The keys in this dict are latex symbols,

      # such as `\\alpha` and the values in the dict are the unicode equivalents for

      # those. Most importantly, only unicode symbols that are valid identifiers in

      # Python 3 are included. 

      # 

      # The original mapping of latex symbols to unicode comes from the `latex_symbols.jl` files from Julia.

      import os, sys

      # Import the Julia LaTeX symbols

      print('Importing latex_symbols.js from Julia...')

      import requests

      url = 'https://raw.githubusercontent.com/JuliaLang/julia/master/stdlib/REPL/src/latex_symbols.jl'

      r = requests.get(url)

      # Build a list of key, value pairs

      print('Building a list of (latex, unicode) key-value pairs...')

      lines = r.text.splitlines()

      prefixes_line = lines.index('# "font" prefixes')

      symbols_line = lines.index('# manual additions:')

      prefix_dict = {}

      for l in lines[prefixes_line + 1: symbols_line]:

          p = l.split()

          if not p or p[1] == 'latex_symbols': continue

          prefix_dict[p[1]] = p[3]

      idents = []

      for l in lines[symbols_line:]:

          if not '=>' in l: continue # if it's not a def, skip

          if '#' in l: l = l[:l.index('#')] # get rid of eol comments

          x, y = l.strip().split('=>') 

          if '*' in x: # if a prefix is present substitute it with its value

              p, x = x.split('*')

              x = prefix_dict[p][:-1] + x[1:]

          x, y = x.split('"')[1], y.split('"')[1] # get the values in quotes

          idents.append((x, y))

      # Filter out non-valid identifiers

      print('Filtering out characters that are not valid Python 3 identifiers')

      def test_ident(i):

          """Is the unicode string valid in a Python 3 identifier."""

          # Some characters are not valid at the start of a name, but we still want to

          # include them. So prefix with 'a', which is valid at the start.

          return ('a' + i).isidentifier()

      assert test_ident("α")

      assert not test_ident('‴')

      valid_idents = [line for line in idents if test_ident(line[1])]

      # Write the `latex_symbols.py` module in the cwd

      s = """# encoding: utf-8

      # DO NOT EDIT THIS FILE BY HAND.

      # To update this file, run the script /tools/gen_latex_symbols.py using Python 3

      # This file is autogenerated from the file:

      # https://raw.githubusercontent.com/JuliaLang/julia/master/base/latex_symbols.jl

      # This original list is filtered to remove any unicode characters that are not valid

      # Python identifiers.

      latex_symbols = {\n

      """

      for line in valid_idents:

          s += '    "%s" : "%s",\n' % (line[0], line[1])

      s += "}\n"

      s += """

      reverse_latex_symbol = { v:k for k,v in latex_symbols.items()}

      """

      fn = os.path.join('..','IPython','core','latex_symbols.py')

      print("Writing the file: %s" % fn)

      with open(fn, 'w', encoding='utf-8') as f:

          f.write(s)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# coding: utf-8

				# This script autogenerates `IPython.core.latex_symbols.py`, which contains a
				# single dict , named `latex_symbols`. The keys in this dict are latex symbols,
				# such as `\\alpha` and the values in the dict are the unicode equivalents for
				# those. Most importantly, only unicode symbols that are valid identifiers in
				# Python 3 are included.

				#
				# The original mapping of latex symbols to unicode comes from the `latex_symbols.jl` files from Julia.

				import os, sys

				# Import the Julia LaTeX symbols
				print('Importing latex_symbols.js from Julia...')
				import requests
				url = 'https://raw.githubusercontent.com/JuliaLang/julia/master/stdlib/REPL/src/latex_symbols.jl'
				r = requests.get(url)


				# Build a list of key, value pairs
				print('Building a list of (latex, unicode) key-value pairs...')
				lines = r.text.splitlines()

				prefixes_line = lines.index('# "font" prefixes')
				symbols_line = lines.index('# manual additions:')

				prefix_dict = {}
				for l in lines[prefixes_line + 1: symbols_line]:
				p = l.split()
				if not p or p[1] == 'latex_symbols': continue
				prefix_dict[p[1]] = p[3]

				idents = []
				for l in lines[symbols_line:]:
				if not '=>' in l: continue # if it's not a def, skip
				if '#' in l: l = l[:l.index('#')] # get rid of eol comments
				x, y = l.strip().split('=>')
				if '*' in x: # if a prefix is present substitute it with its value
				p, x = x.split('*')
				x = prefix_dict[p][:-1] + x[1:]
				x, y = x.split('"')[1], y.split('"')[1] # get the values in quotes
				idents.append((x, y))

				# Filter out non-valid identifiers
				print('Filtering out characters that are not valid Python 3 identifiers')

				def test_ident(i):
				"""Is the unicode string valid in a Python 3 identifier."""
				# Some characters are not valid at the start of a name, but we still want to
				# include them. So prefix with 'a', which is valid at the start.
				return ('a' + i).isidentifier()

				assert test_ident("α")
				assert not test_ident('‴')

				valid_idents = [line for line in idents if test_ident(line[1])]

				# Write the `latex_symbols.py` module in the cwd

				s = """# encoding: utf-8

				# DO NOT EDIT THIS FILE BY HAND.

				# To update this file, run the script /tools/gen_latex_symbols.py using Python 3

				# This file is autogenerated from the file:
				# https://raw.githubusercontent.com/JuliaLang/julia/master/base/latex_symbols.jl
				# This original list is filtered to remove any unicode characters that are not valid
				# Python identifiers.

				latex_symbols = {\n
				"""
				for line in valid_idents:
				s += ' "%s" : "%s",\n' % (line[0], line[1])
				s += "}\n"

				s += """

				reverse_latex_symbol = { v:k for k,v in latex_symbols.items()}
				"""

				fn = os.path.join('..','IPython','core','latex_symbols.py')
				print("Writing the file: %s" % fn)
				with open(fn, 'w', encoding='utf-8') as f:
				f.write(s)