upstream/ipython Files · IPython/utils/tests/test_text.py

Fix , improve performance of auto match for quotes...

Fix , improve performance of auto match for quotes As pointed out in , auto matching of quotes may take a long time if the prefix is long. To be more precise, the longer the text before the first quote, the slower it is. This is all caused by the regex pattern used: `r'^([^"]+|"[^"]*")*$'`, which I suspect is O(2^N) slow. ```python In [1]: text = "function_with_long_nameeee('arg" In [2]: import re In [3]: pattern = re.compile(r"^([^']+|'[^']*')*$") In [4]: %timeit pattern.match(text) 10.3 s ± 67.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [5]: %timeit pattern.match("1'") 312 ns ± 0.775 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [6]: %timeit pattern.match("12'") 462 ns ± 1.95 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [7]: %timeit pattern.match("123'") 766 ns ± 6.32 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [8]: %timeit pattern.match("1234'") 1.59 µs ± 20.9 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) ``` But the pattern we want here can actually be detected with a Python implemention in O(N) time.

Nate Rush - - Load All Authors

File last commit:

r27423:bf6638e6


                r27762:c179c2a5

Download file

             test_text.py
        
                    208 lines
            
             | 7.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tests / test_text.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # encoding: utf-8

      """Tests for IPython.utils.text"""

      #-----------------------------------------------------------------------------

      #  Copyright (C) 2011  The IPython Development Team

      #

      #  Distributed under the terms of the BSD License.  The full license is in

      #  the file COPYING, distributed as part of this software.

      #-----------------------------------------------------------------------------

      #-----------------------------------------------------------------------------

      # Imports

      #-----------------------------------------------------------------------------

      import os

      import math

      import random

      import sys

      from pathlib import Path

      import pytest

      from IPython.utils import text

      #-----------------------------------------------------------------------------

      # Globals

      #-----------------------------------------------------------------------------

      def test_columnize():

          """Basic columnize tests."""

          size = 5

          items = [l*size for l in 'abcd']

          out = text.columnize(items, displaywidth=80)

          assert out == "aaaaa  bbbbb  ccccc  ddddd\n"

          out = text.columnize(items, displaywidth=25)

          assert out == "aaaaa  ccccc\nbbbbb  ddddd\n"

          out = text.columnize(items, displaywidth=12)

          assert out == "aaaaa  ccccc\nbbbbb  ddddd\n"

          out = text.columnize(items, displaywidth=10)

          assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"

          out = text.columnize(items, row_first=True, displaywidth=80)

          assert out == "aaaaa  bbbbb  ccccc  ddddd\n"

          out = text.columnize(items, row_first=True, displaywidth=25)

          assert out == "aaaaa  bbbbb\nccccc  ddddd\n"

          out = text.columnize(items, row_first=True, displaywidth=12)

          assert out == "aaaaa  bbbbb\nccccc  ddddd\n"

          out = text.columnize(items, row_first=True, displaywidth=10)

          assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"

          out = text.columnize(items, displaywidth=40, spread=True)

          assert out == "aaaaa      bbbbb      ccccc      ddddd\n"

          out = text.columnize(items, displaywidth=20, spread=True)

          assert out == "aaaaa          ccccc\nbbbbb          ddddd\n"

          out = text.columnize(items, displaywidth=12, spread=True)

          assert out == "aaaaa  ccccc\nbbbbb  ddddd\n"

          out = text.columnize(items, displaywidth=10, spread=True)

          assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"

      def test_columnize_random():

          """Test with random input to hopefully catch edge case """

          for row_first in [True, False]:

              for nitems in [random.randint(2,70) for i in range(2,20)]:

                  displaywidth = random.randint(20,200)

                  rand_len = [random.randint(2,displaywidth) for i in range(nitems)]

                  items = ['x'*l for l in rand_len]

                  out = text.columnize(items, row_first=row_first, displaywidth=displaywidth)

                  longer_line = max([len(x) for x in out.split('\n')])

                  longer_element = max(rand_len)

                  assert longer_line <= displaywidth, (

                      f"Columnize displayed something lager than displaywidth : {longer_line}\n"

                      f"longer element : {longer_element}\n"

                      f"displaywidth : {displaywidth}\n"

                      f"number of element : {nitems}\n"

                      f"size of each element : {rand_len}\n"

                      f"row_first={row_first}\n"

                  )

      @pytest.mark.parametrize("row_first", [True, False])

      def test_columnize_medium(row_first):

          """Test with inputs than shouldn't be wider than 80"""

          size = 40

          items = [l*size for l in 'abc']

          out = text.columnize(items, row_first=row_first, displaywidth=80)

          assert out == "\n".join(items + [""]), "row_first={0}".format(row_first)

      @pytest.mark.parametrize("row_first", [True, False])

      def test_columnize_long(row_first):

          """Test columnize with inputs longer than the display window"""

          size = 11

          items = [l*size for l in 'abc']

          out = text.columnize(items, row_first=row_first, displaywidth=size - 1)

          assert out == "\n".join(items + [""]), "row_first={0}".format(row_first)

      def eval_formatter_check(f):

          ns = dict(n=12, pi=math.pi, stuff='hello there', os=os, u=u"café", b="café")

          s = f.format("{n} {n//4} {stuff.split()[0]}", **ns)

          assert s == "12 3 hello"

          s = f.format(" ".join(["{n//%i}" % i for i in range(1, 8)]), **ns)

          assert s == "12 6 4 3 2 2 1"

          s = f.format("{[n//i for i in range(1,8)]}", **ns)

          assert s == "[12, 6, 4, 3, 2, 2, 1]"

          s = f.format("{stuff!s}", **ns)

          assert s == ns["stuff"]

          s = f.format("{stuff!r}", **ns)

          assert s == repr(ns["stuff"])

          # Check with unicode:

          s = f.format("{u}", **ns)

          assert s == ns["u"]

          # This decodes in a platform dependent manner, but it shouldn't error out

          s = f.format("{b}", **ns)

          pytest.raises(NameError, f.format, "{dne}", **ns)

      def eval_formatter_slicing_check(f):

          ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)

          s = f.format(" {stuff.split()[:]} ", **ns)

          assert s == " ['hello', 'there'] "

          s = f.format(" {stuff.split()[::-1]} ", **ns)

          assert s == " ['there', 'hello'] "

          s = f.format("{stuff[::2]}", **ns)

          assert s == ns["stuff"][::2]

          pytest.raises(SyntaxError, f.format, "{n:x}", **ns)

      def eval_formatter_no_slicing_check(f):

          ns = dict(n=12, pi=math.pi, stuff="hello there", os=os)

          s = f.format("{n:x} {pi**2:+f}", **ns)

          assert s == "c +9.869604"

          s = f.format("{stuff[slice(1,4)]}", **ns)

          assert s == "ell"

          s = f.format("{a[:]}", a=[1, 2])

          assert s == "[1, 2]"

      def test_eval_formatter():

          f = text.EvalFormatter()

          eval_formatter_check(f)

          eval_formatter_no_slicing_check(f)

      def test_full_eval_formatter():

          f = text.FullEvalFormatter()

          eval_formatter_check(f)

          eval_formatter_slicing_check(f)

      def test_dollar_formatter():

          f = text.DollarFormatter()

          eval_formatter_check(f)

          eval_formatter_slicing_check(f)

          ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)

          s = f.format("$n", **ns)

          assert s == "12"

          s = f.format("$n.real", **ns)

          assert s == "12"

          s = f.format("$n/{stuff[:5]}", **ns)

          assert s == "12/hello"

          s = f.format("$n $$HOME", **ns)

          assert s == "12 $HOME"

          s = f.format("${foo}", foo="HOME")

          assert s == "$HOME"

      def test_strip_email():

          src = """\

              >> >>> def f(x):

              >> ...   return x+1

              >> ... 

              >> >>> zz = f(2.5)"""

          cln = """\

      >>> def f(x):

      ...   return x+1

      ... 

      >>> zz = f(2.5)"""

          assert text.strip_email_quotes(src) == cln

      def test_strip_email2():

          src = "> > > list()"

          cln = "list()"

          assert text.strip_email_quotes(src) == cln

      def test_LSString():

          lss = text.LSString("abc\ndef")

          assert lss.l == ["abc", "def"]

          assert lss.s == "abc def"

          lss = text.LSString(os.getcwd())

          assert isinstance(lss.p[0], Path)

      def test_SList():

          sl = text.SList(["a 11", "b 1", "a 2"])

          assert sl.n == "a 11\nb 1\na 2"

          assert sl.s == "a 11 b 1 a 2"

          assert sl.grep(lambda x: x.startswith("a")) == text.SList(["a 11", "a 2"])

          assert sl.fields(0) == text.SList(["a", "b", "a"])

          assert sl.sort(field=1, nums=True) == text.SList(["b 1", "a 2", "a 11"])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# encoding: utf-8
				"""Tests for IPython.utils.text"""

				#-----------------------------------------------------------------------------
				# Copyright (C) 2011 The IPython Development Team
				#
				# Distributed under the terms of the BSD License. The full license is in
				# the file COPYING, distributed as part of this software.
				#-----------------------------------------------------------------------------

				#-----------------------------------------------------------------------------
				# Imports
				#-----------------------------------------------------------------------------

				import os
				import math
				import random
				import sys

				from pathlib import Path

				import pytest

				from IPython.utils import text

				#-----------------------------------------------------------------------------
				# Globals
				#-----------------------------------------------------------------------------

				def test_columnize():
				"""Basic columnize tests."""
				size = 5
				items = [l*size for l in 'abcd']

				out = text.columnize(items, displaywidth=80)
				assert out == "aaaaa bbbbb ccccc ddddd\n"
				out = text.columnize(items, displaywidth=25)
				assert out == "aaaaa ccccc\nbbbbb ddddd\n"
				out = text.columnize(items, displaywidth=12)
				assert out == "aaaaa ccccc\nbbbbb ddddd\n"
				out = text.columnize(items, displaywidth=10)
				assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"

				out = text.columnize(items, row_first=True, displaywidth=80)
				assert out == "aaaaa bbbbb ccccc ddddd\n"
				out = text.columnize(items, row_first=True, displaywidth=25)
				assert out == "aaaaa bbbbb\nccccc ddddd\n"
				out = text.columnize(items, row_first=True, displaywidth=12)
				assert out == "aaaaa bbbbb\nccccc ddddd\n"
				out = text.columnize(items, row_first=True, displaywidth=10)
				assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"

				out = text.columnize(items, displaywidth=40, spread=True)
				assert out == "aaaaa bbbbb ccccc ddddd\n"
				out = text.columnize(items, displaywidth=20, spread=True)
				assert out == "aaaaa ccccc\nbbbbb ddddd\n"
				out = text.columnize(items, displaywidth=12, spread=True)
				assert out == "aaaaa ccccc\nbbbbb ddddd\n"
				out = text.columnize(items, displaywidth=10, spread=True)
				assert out == "aaaaa\nbbbbb\nccccc\nddddd\n"


				def test_columnize_random():
				"""Test with random input to hopefully catch edge case """
				for row_first in [True, False]:
				for nitems in [random.randint(2,70) for i in range(2,20)]:
				displaywidth = random.randint(20,200)
				rand_len = [random.randint(2,displaywidth) for i in range(nitems)]
				items = ['x'*l for l in rand_len]
				out = text.columnize(items, row_first=row_first, displaywidth=displaywidth)
				longer_line = max([len(x) for x in out.split('\n')])
				longer_element = max(rand_len)
				assert longer_line <= displaywidth, (
				f"Columnize displayed something lager than displaywidth : {longer_line}\n"
				f"longer element : {longer_element}\n"
				f"displaywidth : {displaywidth}\n"
				f"number of element : {nitems}\n"
				f"size of each element : {rand_len}\n"
				f"row_first={row_first}\n"
				)


				@pytest.mark.parametrize("row_first", [True, False])
				def test_columnize_medium(row_first):
				"""Test with inputs than shouldn't be wider than 80"""
				size = 40
				items = [l*size for l in 'abc']
				out = text.columnize(items, row_first=row_first, displaywidth=80)
				assert out == "\n".join(items + [""]), "row_first={0}".format(row_first)


				@pytest.mark.parametrize("row_first", [True, False])
				def test_columnize_long(row_first):
				"""Test columnize with inputs longer than the display window"""
				size = 11
				items = [l*size for l in 'abc']
				out = text.columnize(items, row_first=row_first, displaywidth=size - 1)
				assert out == "\n".join(items + [""]), "row_first={0}".format(row_first)


				def eval_formatter_check(f):
				ns = dict(n=12, pi=math.pi, stuff='hello there', os=os, u=u"café", b="café")
				s = f.format("{n} {n//4} {stuff.split()[0]}", **ns)
				assert s == "12 3 hello"
				s = f.format(" ".join(["{n//%i}" % i for i in range(1, 8)]), **ns)
				assert s == "12 6 4 3 2 2 1"
				s = f.format("{[n//i for i in range(1,8)]}", **ns)
				assert s == "[12, 6, 4, 3, 2, 2, 1]"
				s = f.format("{stuff!s}", **ns)
				assert s == ns["stuff"]
				s = f.format("{stuff!r}", **ns)
				assert s == repr(ns["stuff"])

				# Check with unicode:
				s = f.format("{u}", **ns)
				assert s == ns["u"]
				# This decodes in a platform dependent manner, but it shouldn't error out
				s = f.format("{b}", **ns)

				pytest.raises(NameError, f.format, "{dne}", **ns)


				def eval_formatter_slicing_check(f):
				ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)
				s = f.format(" {stuff.split()[:]} ", **ns)
				assert s == " ['hello', 'there'] "
				s = f.format(" {stuff.split()[::-1]} ", **ns)
				assert s == " ['there', 'hello'] "
				s = f.format("{stuff[::2]}", **ns)
				assert s == ns["stuff"][::2]

				pytest.raises(SyntaxError, f.format, "{n:x}", **ns)

				def eval_formatter_no_slicing_check(f):
				ns = dict(n=12, pi=math.pi, stuff="hello there", os=os)

				s = f.format("{n:x} {pi2:+f}", ns)
				assert s == "c +9.869604"

				s = f.format("{stuff[slice(1,4)]}", **ns)
				assert s == "ell"

				s = f.format("{a[:]}", a=[1, 2])
				assert s == "[1, 2]"

				def test_eval_formatter():
				f = text.EvalFormatter()
				eval_formatter_check(f)
				eval_formatter_no_slicing_check(f)

				def test_full_eval_formatter():
				f = text.FullEvalFormatter()
				eval_formatter_check(f)
				eval_formatter_slicing_check(f)

				def test_dollar_formatter():
				f = text.DollarFormatter()
				eval_formatter_check(f)
				eval_formatter_slicing_check(f)

				ns = dict(n=12, pi=math.pi, stuff='hello there', os=os)
				s = f.format("$n", **ns)
				assert s == "12"
				s = f.format("$n.real", **ns)
				assert s == "12"
				s = f.format("$n/{stuff[:5]}", **ns)
				assert s == "12/hello"
				s = f.format("$n $$HOME", **ns)
				assert s == "12 $HOME"
				s = f.format("${foo}", foo="HOME")
				assert s == "$HOME"


				def test_strip_email():
				src = """\
				>> >>> def f(x):
				>> ... return x+1
				>> ...
				>> >>> zz = f(2.5)"""
				cln = """\
				>>> def f(x):
				... return x+1
				...
				>>> zz = f(2.5)"""
				assert text.strip_email_quotes(src) == cln


				def test_strip_email2():
				src = "> > > list()"
				cln = "list()"
				assert text.strip_email_quotes(src) == cln


				def test_LSString():
				lss = text.LSString("abc\ndef")
				assert lss.l == ["abc", "def"]
				assert lss.s == "abc def"
				lss = text.LSString(os.getcwd())
				assert isinstance(lss.p[0], Path)


				def test_SList():
				sl = text.SList(["a 11", "b 1", "a 2"])
				assert sl.n == "a 11\nb 1\na 2"
				assert sl.s == "a 11 b 1 a 2"
				assert sl.grep(lambda x: x.startswith("a")) == text.SList(["a 11", "a 2"])
				assert sl.fields(0) == text.SList(["a", "b", "a"])
				assert sl.sort(field=1, nums=True) == text.SList(["b 1", "a 2", "a 11"])