upstream/ipython Files · IPython/utils/tests/test_openpy.py

Fix , improve performance of auto match for quotes...

Fix , improve performance of auto match for quotes As pointed out in , auto matching of quotes may take a long time if the prefix is long. To be more precise, the longer the text before the first quote, the slower it is. This is all caused by the regex pattern used: `r'^([^"]+|"[^"]*")*$'`, which I suspect is O(2^N) slow. ```python In [1]: text = "function_with_long_nameeee('arg" In [2]: import re In [3]: pattern = re.compile(r"^([^']+|'[^']*')*$") In [4]: %timeit pattern.match(text) 10.3 s ± 67.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [5]: %timeit pattern.match("1'") 312 ns ± 0.775 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [6]: %timeit pattern.match("12'") 462 ns ± 1.95 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [7]: %timeit pattern.match("123'") 766 ns ± 6.32 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [8]: %timeit pattern.match("1234'") 1.59 µs ± 20.9 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) ``` But the pattern we want here can actually be detected with a Python implemention in O(N) time.

Samuel Gaist - - Load All Authors

File last commit:

r26922:9c38a13d


                r27762:c179c2a5

Download file

             test_openpy.py
        
                    38 lines
            
             | 1.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tests / test_openpy.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      import io

      import os.path

      from IPython.utils import openpy

      mydir = os.path.dirname(__file__)

      nonascii_path = os.path.join(mydir, "../../core/tests/nonascii.py")

      def test_detect_encoding():

          with open(nonascii_path, "rb") as f:

              enc, lines = openpy.detect_encoding(f.readline)

          assert enc == "iso-8859-5"

      def test_read_file():

          with io.open(nonascii_path, encoding="iso-8859-5") as f:

              read_specified_enc = f.read()

          read_detected_enc = openpy.read_py_file(nonascii_path, skip_encoding_cookie=False)

          assert read_detected_enc == read_specified_enc

          assert "coding: iso-8859-5" in read_detected_enc

          read_strip_enc_cookie = openpy.read_py_file(

              nonascii_path, skip_encoding_cookie=True

          )

          assert "coding: iso-8859-5" not in read_strip_enc_cookie

      def test_source_to_unicode():

          with io.open(nonascii_path, "rb") as f:

              source_bytes = f.read()

          assert (

              openpy.source_to_unicode(source_bytes, skip_encoding_cookie=False).splitlines()

              == source_bytes.decode("iso-8859-5").splitlines()

          )

          source_no_cookie = openpy.source_to_unicode(source_bytes, skip_encoding_cookie=True)

          assert "coding: iso-8859-5" not in source_no_cookie

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				import io
				import os.path

				from IPython.utils import openpy

				mydir = os.path.dirname(__file__)
				nonascii_path = os.path.join(mydir, "../../core/tests/nonascii.py")


				def test_detect_encoding():
				with open(nonascii_path, "rb") as f:
				enc, lines = openpy.detect_encoding(f.readline)
				assert enc == "iso-8859-5"


				def test_read_file():
				with io.open(nonascii_path, encoding="iso-8859-5") as f:
				read_specified_enc = f.read()
				read_detected_enc = openpy.read_py_file(nonascii_path, skip_encoding_cookie=False)
				assert read_detected_enc == read_specified_enc
				assert "coding: iso-8859-5" in read_detected_enc

				read_strip_enc_cookie = openpy.read_py_file(
				nonascii_path, skip_encoding_cookie=True
				)
				assert "coding: iso-8859-5" not in read_strip_enc_cookie


				def test_source_to_unicode():
				with io.open(nonascii_path, "rb") as f:
				source_bytes = f.read()
				assert (
				openpy.source_to_unicode(source_bytes, skip_encoding_cookie=False).splitlines()
				== source_bytes.decode("iso-8859-5").splitlines()
				)

				source_no_cookie = openpy.source_to_unicode(source_bytes, skip_encoding_cookie=True)
				assert "coding: iso-8859-5" not in source_no_cookie