upstream/ipython Files · IPython/utils/tests/test_tokenutil.py

Fix , improve performance of auto match for quotes...

Fix , improve performance of auto match for quotes As pointed out in , auto matching of quotes may take a long time if the prefix is long. To be more precise, the longer the text before the first quote, the slower it is. This is all caused by the regex pattern used: `r'^([^"]+|"[^"]*")*$'`, which I suspect is O(2^N) slow. ```python In [1]: text = "function_with_long_nameeee('arg" In [2]: import re In [3]: pattern = re.compile(r"^([^']+|'[^']*')*$") In [4]: %timeit pattern.match(text) 10.3 s ± 67.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [5]: %timeit pattern.match("1'") 312 ns ± 0.775 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [6]: %timeit pattern.match("12'") 462 ns ± 1.95 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [7]: %timeit pattern.match("123'") 766 ns ± 6.32 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) In [8]: %timeit pattern.match("1234'") 1.59 µs ± 20.9 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each) ``` But the pattern we want here can actually be detected with a Python implemention in O(N) time.

Nikita Kniazev - - Load All Authors

File last commit:

r27098:7410af58


                r27762:c179c2a5

Download file

             test_tokenutil.py
        
                    141 lines
            
             | 3.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tests / test_tokenutil.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Tests for tokenutil"""

      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

      import pytest

      from IPython.utils.tokenutil import token_at_cursor, line_at_cursor

      def expect_token(expected, cell, cursor_pos):

          token = token_at_cursor(cell, cursor_pos)

          offset = 0

          for line in cell.splitlines():

              if offset + len(line) >= cursor_pos:

                  break

              else:

                  offset += len(line)+1

          column = cursor_pos - offset

          line_with_cursor = "%s|%s" % (line[:column], line[column:])

          assert token == expected, "Expected %r, got %r in: %r (pos %i)" % (

              expected,

              token,

              line_with_cursor,

              cursor_pos,

          )

      def test_simple():

          cell = "foo"

          for i in range(len(cell)):

              expect_token("foo", cell, i)

      def test_function():

          cell = "foo(a=5, b='10')"

          expected = 'foo'

          # up to `foo(|a=`

          for i in range(cell.find('a=') + 1):

              expect_token("foo", cell, i)

          # find foo after `=`

          for i in [cell.find('=') + 1, cell.rfind('=') + 1]:

              expect_token("foo", cell, i)

          # in between `5,|` and `|b=`

          for i in range(cell.find(','), cell.find('b=')):

              expect_token("foo", cell, i)

      def test_multiline():

          cell = '\n'.join([

              'a = 5',

              'b = hello("string", there)'

          ])

          expected = 'hello'

          start = cell.index(expected) + 1

          for i in range(start, start + len(expected)):

              expect_token(expected, cell, i)

          expected = 'hello'

          start = cell.index(expected) + 1

          for i in range(start, start + len(expected)):

              expect_token(expected, cell, i)

      def test_multiline_token():

          cell = '\n'.join([

              '"""\n\nxxxxxxxxxx\n\n"""',

              '5, """',

              'docstring',

              'multiline token',

              '""", [',

              '2, 3, "complicated"]',

              'b = hello("string", there)'

          ])

          expected = 'hello'

          start = cell.index(expected) + 1

          for i in range(start, start + len(expected)):

              expect_token(expected, cell, i)

          expected = 'hello'

          start = cell.index(expected) + 1

          for i in range(start, start + len(expected)):

              expect_token(expected, cell, i)

      def test_nested_call():

          cell = "foo(bar(a=5), b=10)"

          expected = 'foo'

          start = cell.index('bar') + 1

          for i in range(start, start + 3):

              expect_token(expected, cell, i)

          expected = 'bar'

          start = cell.index('a=')

          for i in range(start, start + 3):

              expect_token(expected, cell, i)

          expected = 'foo'

          start = cell.index(')') + 1

          for i in range(start, len(cell)-1):

              expect_token(expected, cell, i)

      def test_attrs():

          cell = "a = obj.attr.subattr"

          expected = 'obj'

          idx = cell.find('obj') + 1

          for i in range(idx, idx + 3):

              expect_token(expected, cell, i)

          idx = cell.find('.attr') + 2

          expected = 'obj.attr'

          for i in range(idx, idx + 4):

              expect_token(expected, cell, i)

          idx = cell.find('.subattr') + 2

          expected = 'obj.attr.subattr'

          for i in range(idx, len(cell)):

              expect_token(expected, cell, i)

      def test_line_at_cursor():

          cell = ""

          (line, offset) = line_at_cursor(cell, cursor_pos=11)

          assert line == ""

          assert offset == 0

          # The position after a newline should be the start of the following line.

          cell = "One\nTwo\n"

          (line, offset) = line_at_cursor(cell, cursor_pos=4)

          assert line == "Two\n"

          assert offset == 4

          # The end of a cell should be on the last line

          cell = "pri\npri"

          (line, offset) = line_at_cursor(cell, cursor_pos=7)

          assert line == "pri"

          assert offset == 4

      @pytest.mark.parametrize(

          "c, token",

          zip(

              list(range(16, 22)) + list(range(22, 28)),

              ["int"] * (22 - 16) + ["map"] * (28 - 22),

          ),

      )

      def test_multiline_statement(c, token):

          cell = """a = (1,

          3)

      int()

      map()

      """

          expect_token(token, cell, c)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Tests for tokenutil"""
				# Copyright (c) IPython Development Team.
				# Distributed under the terms of the Modified BSD License.

				import pytest

				from IPython.utils.tokenutil import token_at_cursor, line_at_cursor

				def expect_token(expected, cell, cursor_pos):
				token = token_at_cursor(cell, cursor_pos)
				offset = 0
				for line in cell.splitlines():
				if offset + len(line) >= cursor_pos:
				break
				else:
				offset += len(line)+1
				column = cursor_pos - offset
				line_with_cursor = "%s\|%s" % (line[:column], line[column:])
				assert token == expected, "Expected %r, got %r in: %r (pos %i)" % (
				expected,
				token,
				line_with_cursor,
				cursor_pos,
				)


				def test_simple():
				cell = "foo"
				for i in range(len(cell)):
				expect_token("foo", cell, i)

				def test_function():
				cell = "foo(a=5, b='10')"
				expected = 'foo'
				# up to `foo(\|a=`
				for i in range(cell.find('a=') + 1):
				expect_token("foo", cell, i)
				# find foo after `=`
				for i in [cell.find('=') + 1, cell.rfind('=') + 1]:
				expect_token("foo", cell, i)
				# in between `5,\|` and `\|b=`
				for i in range(cell.find(','), cell.find('b=')):
				expect_token("foo", cell, i)

				def test_multiline():
				cell = '\n'.join([
				'a = 5',
				'b = hello("string", there)'
				])
				expected = 'hello'
				start = cell.index(expected) + 1
				for i in range(start, start + len(expected)):
				expect_token(expected, cell, i)
				expected = 'hello'
				start = cell.index(expected) + 1
				for i in range(start, start + len(expected)):
				expect_token(expected, cell, i)

				def test_multiline_token():
				cell = '\n'.join([
				'"""\n\nxxxxxxxxxx\n\n"""',
				'5, """',
				'docstring',
				'multiline token',
				'""", [',
				'2, 3, "complicated"]',
				'b = hello("string", there)'
				])
				expected = 'hello'
				start = cell.index(expected) + 1
				for i in range(start, start + len(expected)):
				expect_token(expected, cell, i)
				expected = 'hello'
				start = cell.index(expected) + 1
				for i in range(start, start + len(expected)):
				expect_token(expected, cell, i)

				def test_nested_call():
				cell = "foo(bar(a=5), b=10)"
				expected = 'foo'
				start = cell.index('bar') + 1
				for i in range(start, start + 3):
				expect_token(expected, cell, i)
				expected = 'bar'
				start = cell.index('a=')
				for i in range(start, start + 3):
				expect_token(expected, cell, i)
				expected = 'foo'
				start = cell.index(')') + 1
				for i in range(start, len(cell)-1):
				expect_token(expected, cell, i)

				def test_attrs():
				cell = "a = obj.attr.subattr"
				expected = 'obj'
				idx = cell.find('obj') + 1
				for i in range(idx, idx + 3):
				expect_token(expected, cell, i)
				idx = cell.find('.attr') + 2
				expected = 'obj.attr'
				for i in range(idx, idx + 4):
				expect_token(expected, cell, i)
				idx = cell.find('.subattr') + 2
				expected = 'obj.attr.subattr'
				for i in range(idx, len(cell)):
				expect_token(expected, cell, i)

				def test_line_at_cursor():
				cell = ""
				(line, offset) = line_at_cursor(cell, cursor_pos=11)
				assert line == ""
				assert offset == 0

				# The position after a newline should be the start of the following line.
				cell = "One\nTwo\n"
				(line, offset) = line_at_cursor(cell, cursor_pos=4)
				assert line == "Two\n"
				assert offset == 4

				# The end of a cell should be on the last line
				cell = "pri\npri"
				(line, offset) = line_at_cursor(cell, cursor_pos=7)
				assert line == "pri"
				assert offset == 4


				@pytest.mark.parametrize(
				"c, token",
				zip(
				list(range(16, 22)) + list(range(22, 28)),
				["int"] * (22 - 16) + ["map"] * (28 - 22),
				),
				)
				def test_multiline_statement(c, token):
				cell = """a = (1,
				3)

				int()
				map()
				"""
				expect_token(token, cell, c)