From 9ab89bf0b00d0371d4b3a023fbebd12e50e3d5fa 2020-05-18 23:04:26 From: Matthias Bussonnier Date: 2020-05-18 23:04:26 Subject: [PATCH] Update unicode ranges to fix test on nightly. The number of valid unicode symbols on nightly has increased, so update the ranges we skip. --- diff --git a/IPython/core/completer.py b/IPython/core/completer.py index ee165c5..dd09add 100644 --- a/IPython/core/completer.py +++ b/IPython/core/completer.py @@ -162,7 +162,7 @@ except ImportError: # write this). With below range we cover them all, with a density of ~67% # biggest next gap we consider only adds up about 1% density and there are 600 # gaps that would need hard coding. -_UNICODE_RANGES = [(32, 0x2fa1e), (0xe0001, 0xe01f0)] +_UNICODE_RANGES = [(32, 0x3134b), (0xe0001, 0xe01f0)] # Public API __all__ = ['Completer','IPCompleter'] diff --git a/IPython/core/tests/test_completer.py b/IPython/core/tests/test_completer.py index 413fc4f..80f479f 100644 --- a/IPython/core/tests/test_completer.py +++ b/IPython/core/tests/test_completer.py @@ -33,6 +33,40 @@ from nose.tools import assert_in, assert_not_in # Test functions # ----------------------------------------------------------------------------- +def recompute_unicode_ranges(): + """ + utility to recompute the largest unicode range without any characters + + use to recompute the gap in the global _UNICODE_RANGES of completer.py + """ + import itertools + import unicodedata + valid = [] + for c in range(0,0x10FFFF + 1): + try: + unicodedata.name(chr(c)) + except ValueError: + continue + valid.append(c) + + def ranges(i): + for a, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]): + b = list(b) + yield b[0][1], b[-1][1] + + rg = list(ranges(valid)) + lens = [] + gap_lens = [] + pstart, pstop = 0,0 + for start, stop in rg: + lens.append(stop-start) + gap_lens.append((start - pstop, hex(pstop), hex(start), f'{round((start - pstop)/0xe01f0*100)}%')) + pstart, pstop = start, stop + + return sorted(gap_lens)[-1] + + + def test_unicode_range(): """ Test that the ranges we test for unicode names give the same number of @@ -47,10 +81,24 @@ def test_unicode_range(): # do not inline the len() or on error pytest will try to print the 130 000 + # elements. - assert len_exp == len_test + message = None + if len_exp != len_test or len_exp > 131808: + size, start, stop, prct = recompute_unicode_ranges() + message = f"""_UNICODE_RANGES likely wrong and need updating. This is + likely due to a new release of Python. We've find that the biggest gap + in unicode characters has reduces in size to be {size} charaters + ({prct}), from {start}, to {stop}. In completer.py likely update to + + _UNICODE_RANGES = [(32, {start}), ({stop}, 0xe01f0)] + + And update the assertion below to use + + len_exp <= {len_exp} + """ + assert len_exp == len_test, message # fail if new unicode symbols have been added. - assert len_exp <= 131808 + assert len_exp <= 137714, message @contextmanager