From c20d7a0da79b0d784e717471534d4d716ec2ae4a 2021-06-30 15:20:34 From: Blazej Michalik Date: 2021-06-30 15:20:34 Subject: [PATCH] Refactor strip_email_quotes Also removes long_substr, which fixes the licesing issue mentioned in #13039. --- diff --git a/IPython/utils/tests/test_text.py b/IPython/utils/tests/test_text.py index 68474ae..0fb6430 100644 --- a/IPython/utils/tests/test_text.py +++ b/IPython/utils/tests/test_text.py @@ -165,19 +165,6 @@ def test_dollar_formatter(): nt.assert_equal(s, "$HOME") -def test_long_substr(): - data = ['hi'] - nt.assert_equal(text.long_substr(data), 'hi') - - -def test_long_substr2(): - data = ['abc', 'abd', 'abf', 'ab'] - nt.assert_equal(text.long_substr(data), 'ab') - -def test_long_substr_empty(): - data = [] - nt.assert_equal(text.long_substr(data), '') - def test_strip_email(): src = """\ >> >>> def f(x): diff --git a/IPython/utils/text.py b/IPython/utils/text.py index f2131ac..f54f97b 100644 --- a/IPython/utils/text.py +++ b/IPython/utils/text.py @@ -10,6 +10,7 @@ Inheritance diagram: import os import re +import string import sys import textwrap from string import Formatter @@ -405,22 +406,6 @@ def wrap_paragraphs(text, ncols=80): return out_ps -def long_substr(data): - """Return the longest common substring in a list of strings. - - Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python - """ - substr = '' - if len(data) > 1 and len(data[0]) > 0: - for i in range(len(data[0])): - for j in range(len(data[0])-i+1): - if j > len(substr) and all(data[0][i:i+j] in x for x in data): - substr = data[0][i:i+j] - elif len(data) == 1: - substr = data[0] - return substr - - def strip_email_quotes(text): """Strip leading email quotation characters ('>'). @@ -447,27 +432,30 @@ def strip_email_quotes(text): In [4]: strip_email_quotes('> > text\\n> > more\\n> more...') Out[4]: '> text\\n> more\\nmore...' - So if any line has no quote marks ('>') , then none are stripped from any + So if any line has no quote marks ('>'), then none are stripped from any of them :: - + In [5]: strip_email_quotes('> > text\\n> > more\\nlast different') Out[5]: '> > text\\n> > more\\nlast different' """ lines = text.splitlines() - matches = set() - for line in lines: - prefix = re.match(r'^(\s*>[ >]*)', line) - if prefix: - matches.add(prefix.group(1)) + strip_len = 0 + + for characters in zip(*lines): + # Check if all characters in this position are the same + if len(set(characters)) > 1: + break + prefix_char = characters[0][0] + + if prefix_char in string.whitespace or prefix_char == ">": + strip_len += 1 else: break - else: - prefix = long_substr(list(matches)) - if prefix: - strip = len(prefix) - text = '\n'.join([ ln[strip:] for ln in lines]) + + text = "\n".join([ln[strip_len:] for ln in lines]) return text + def strip_ansi(source): """ Remove ansi escape codes from text.