##// END OF EJS Templates
Refactor strip_email_quotes...
Blazej Michalik -
Show More
@@ -165,19 +165,6 b' def test_dollar_formatter():'
165 165 nt.assert_equal(s, "$HOME")
166 166
167 167
168 def test_long_substr():
169 data = ['hi']
170 nt.assert_equal(text.long_substr(data), 'hi')
171
172
173 def test_long_substr2():
174 data = ['abc', 'abd', 'abf', 'ab']
175 nt.assert_equal(text.long_substr(data), 'ab')
176
177 def test_long_substr_empty():
178 data = []
179 nt.assert_equal(text.long_substr(data), '')
180
181 168 def test_strip_email():
182 169 src = """\
183 170 >> >>> def f(x):
@@ -10,6 +10,7 b' Inheritance diagram:'
10 10
11 11 import os
12 12 import re
13 import string
13 14 import sys
14 15 import textwrap
15 16 from string import Formatter
@@ -405,22 +406,6 b' def wrap_paragraphs(text, ncols=80):'
405 406 return out_ps
406 407
407 408
408 def long_substr(data):
409 """Return the longest common substring in a list of strings.
410
411 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
412 """
413 substr = ''
414 if len(data) > 1 and len(data[0]) > 0:
415 for i in range(len(data[0])):
416 for j in range(len(data[0])-i+1):
417 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
418 substr = data[0][i:i+j]
419 elif len(data) == 1:
420 substr = data[0]
421 return substr
422
423
424 409 def strip_email_quotes(text):
425 410 """Strip leading email quotation characters ('>').
426 411
@@ -454,20 +439,23 b' def strip_email_quotes(text):'
454 439 Out[5]: '> > text\\n> > more\\nlast different'
455 440 """
456 441 lines = text.splitlines()
457 matches = set()
458 for line in lines:
459 prefix = re.match(r'^(\s*>[ >]*)', line)
460 if prefix:
461 matches.add(prefix.group(1))
462 else:
442 strip_len = 0
443
444 for characters in zip(*lines):
445 # Check if all characters in this position are the same
446 if len(set(characters)) > 1:
463 447 break
448 prefix_char = characters[0][0]
449
450 if prefix_char in string.whitespace or prefix_char == ">":
451 strip_len += 1
464 452 else:
465 prefix = long_substr(list(matches))
466 if prefix:
467 strip = len(prefix)
468 text = '\n'.join([ ln[strip:] for ln in lines])
453 break
454
455 text = "\n".join([ln[strip_len:] for ln in lines])
469 456 return text
470 457
458
471 459 def strip_ansi(source):
472 460 """
473 461 Remove ansi escape codes from text.
General Comments 0
You need to be logged in to leave comments. Login now