##// END OF EJS Templates
Deprecation and removal for 8.17...
Matthias Bussonnier -
Show More
@@ -1,787 +1,793 b''
1 """DEPRECATED: Input handling and transformation machinery.
1 """DEPRECATED: Input handling and transformation machinery.
2
2
3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
4
4
5 The first class in this module, :class:`InputSplitter`, is designed to tell when
5 The first class in this module, :class:`InputSplitter`, is designed to tell when
6 input from a line-oriented frontend is complete and should be executed, and when
6 input from a line-oriented frontend is complete and should be executed, and when
7 the user should be prompted for another line of code instead. The name 'input
7 the user should be prompted for another line of code instead. The name 'input
8 splitter' is largely for historical reasons.
8 splitter' is largely for historical reasons.
9
9
10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
11 with full support for the extended IPython syntax (magics, system calls, etc).
11 with full support for the extended IPython syntax (magics, system calls, etc).
12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
14 and stores the results.
14 and stores the results.
15
15
16 For more details, see the class docstrings below.
16 For more details, see the class docstrings below.
17 """
17 """
18
18
19 from warnings import warn
19 from warnings import warn
20
20
21 warn('IPython.core.inputsplitter is deprecated since IPython 7 in favor of `IPython.core.inputtransformer2`',
21 warn('IPython.core.inputsplitter is deprecated since IPython 7 in favor of `IPython.core.inputtransformer2`',
22 DeprecationWarning)
22 DeprecationWarning)
23
23
24 # Copyright (c) IPython Development Team.
24 # Copyright (c) IPython Development Team.
25 # Distributed under the terms of the Modified BSD License.
25 # Distributed under the terms of the Modified BSD License.
26 import ast
26 import ast
27 import codeop
27 import codeop
28 import io
28 import io
29 import re
29 import re
30 import sys
30 import sys
31 import tokenize
31 import tokenize
32 import warnings
32 import warnings
33
33
34 from typing import List
34 from typing import List
35
35
36 from IPython.core.inputtransformer import (leading_indent,
36 from IPython.core.inputtransformer import (leading_indent,
37 classic_prompt,
37 classic_prompt,
38 ipy_prompt,
38 ipy_prompt,
39 cellmagic,
39 cellmagic,
40 assemble_logical_lines,
40 assemble_logical_lines,
41 help_end,
41 help_end,
42 escaped_commands,
42 escaped_commands,
43 assign_from_magic,
43 assign_from_magic,
44 assign_from_system,
44 assign_from_system,
45 assemble_python_lines,
45 assemble_python_lines,
46 )
46 )
47 from IPython.utils import tokenutil
47 from IPython.utils import tokenutil
48
48
49 # These are available in this module for backwards compatibility.
49 # These are available in this module for backwards compatibility.
50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
51 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
51 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
52 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
52 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
53
53
54 #-----------------------------------------------------------------------------
54 #-----------------------------------------------------------------------------
55 # Utilities
55 # Utilities
56 #-----------------------------------------------------------------------------
56 #-----------------------------------------------------------------------------
57
57
58 # FIXME: These are general-purpose utilities that later can be moved to the
58 # FIXME: These are general-purpose utilities that later can be moved to the
59 # general ward. Kept here for now because we're being very strict about test
59 # general ward. Kept here for now because we're being very strict about test
60 # coverage with this code, and this lets us ensure that we keep 100% coverage
60 # coverage with this code, and this lets us ensure that we keep 100% coverage
61 # while developing.
61 # while developing.
62
62
63 # compiled regexps for autoindent management
63 # compiled regexps for autoindent management
64 dedent_re = re.compile('|'.join([
64 dedent_re = re.compile('|'.join([
65 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
65 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
66 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
66 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
67 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
67 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
68 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
68 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
69 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
69 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
70 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
70 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
71 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
71 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
72 ]))
72 ]))
73 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
73 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
74
74
75 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
75 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
76 # before pure comments
76 # before pure comments
77 comment_line_re = re.compile(r'^\s*\#')
77 comment_line_re = re.compile(r'^\s*\#')
78
78
79
79
80 def num_ini_spaces(s):
80 def num_ini_spaces(s):
81 """Return the number of initial spaces in a string.
81 """Return the number of initial spaces in a string.
82
82
83 Note that tabs are counted as a single space. For now, we do *not* support
83 Note that tabs are counted as a single space. For now, we do *not* support
84 mixing of tabs and spaces in the user's input.
84 mixing of tabs and spaces in the user's input.
85
85
86 Parameters
86 Parameters
87 ----------
87 ----------
88 s : string
88 s : string
89
89
90 Returns
90 Returns
91 -------
91 -------
92 n : int
92 n : int
93 """
93 """
94
94 warnings.warn(
95 "`num_ini_spaces` is Pending Deprecation since IPython 8.17."
96 "It is considered fro removal in in future version. "
97 "Please open an issue if you believe it should be kept.",
98 stacklevel=2,
99 category=PendingDeprecationWarning,
100 )
95 ini_spaces = ini_spaces_re.match(s)
101 ini_spaces = ini_spaces_re.match(s)
96 if ini_spaces:
102 if ini_spaces:
97 return ini_spaces.end()
103 return ini_spaces.end()
98 else:
104 else:
99 return 0
105 return 0
100
106
101 # Fake token types for partial_tokenize:
107 # Fake token types for partial_tokenize:
102 INCOMPLETE_STRING = tokenize.N_TOKENS
108 INCOMPLETE_STRING = tokenize.N_TOKENS
103 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
109 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
104
110
105 # The 2 classes below have the same API as TokenInfo, but don't try to look up
111 # The 2 classes below have the same API as TokenInfo, but don't try to look up
106 # a token type name that they won't find.
112 # a token type name that they won't find.
107 class IncompleteString:
113 class IncompleteString:
108 type = exact_type = INCOMPLETE_STRING
114 type = exact_type = INCOMPLETE_STRING
109 def __init__(self, s, start, end, line):
115 def __init__(self, s, start, end, line):
110 self.s = s
116 self.s = s
111 self.start = start
117 self.start = start
112 self.end = end
118 self.end = end
113 self.line = line
119 self.line = line
114
120
115 class InMultilineStatement:
121 class InMultilineStatement:
116 type = exact_type = IN_MULTILINE_STATEMENT
122 type = exact_type = IN_MULTILINE_STATEMENT
117 def __init__(self, pos, line):
123 def __init__(self, pos, line):
118 self.s = ''
124 self.s = ''
119 self.start = self.end = pos
125 self.start = self.end = pos
120 self.line = line
126 self.line = line
121
127
122 def partial_tokens(s):
128 def partial_tokens(s):
123 """Iterate over tokens from a possibly-incomplete string of code.
129 """Iterate over tokens from a possibly-incomplete string of code.
124
130
125 This adds two special token types: INCOMPLETE_STRING and
131 This adds two special token types: INCOMPLETE_STRING and
126 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
132 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
127 represent the two main ways for code to be incomplete.
133 represent the two main ways for code to be incomplete.
128 """
134 """
129 readline = io.StringIO(s).readline
135 readline = io.StringIO(s).readline
130 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
136 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
131 try:
137 try:
132 for token in tokenutil.generate_tokens_catch_errors(readline):
138 for token in tokenutil.generate_tokens_catch_errors(readline):
133 yield token
139 yield token
134 except tokenize.TokenError as e:
140 except tokenize.TokenError as e:
135 # catch EOF error
141 # catch EOF error
136 lines = s.splitlines(keepends=True)
142 lines = s.splitlines(keepends=True)
137 end = len(lines), len(lines[-1])
143 end = len(lines), len(lines[-1])
138 if 'multi-line string' in e.args[0]:
144 if 'multi-line string' in e.args[0]:
139 l, c = start = token.end
145 l, c = start = token.end
140 s = lines[l-1][c:] + ''.join(lines[l:])
146 s = lines[l-1][c:] + ''.join(lines[l:])
141 yield IncompleteString(s, start, end, lines[-1])
147 yield IncompleteString(s, start, end, lines[-1])
142 elif 'multi-line statement' in e.args[0]:
148 elif 'multi-line statement' in e.args[0]:
143 yield InMultilineStatement(end, lines[-1])
149 yield InMultilineStatement(end, lines[-1])
144 else:
150 else:
145 raise
151 raise
146
152
147 def find_next_indent(code):
153 def find_next_indent(code):
148 """Find the number of spaces for the next line of indentation"""
154 """Find the number of spaces for the next line of indentation"""
149 tokens = list(partial_tokens(code))
155 tokens = list(partial_tokens(code))
150 if tokens[-1].type == tokenize.ENDMARKER:
156 if tokens[-1].type == tokenize.ENDMARKER:
151 tokens.pop()
157 tokens.pop()
152 if not tokens:
158 if not tokens:
153 return 0
159 return 0
154
160
155 while tokens[-1].type in {
161 while tokens[-1].type in {
156 tokenize.DEDENT,
162 tokenize.DEDENT,
157 tokenize.NEWLINE,
163 tokenize.NEWLINE,
158 tokenize.COMMENT,
164 tokenize.COMMENT,
159 tokenize.ERRORTOKEN,
165 tokenize.ERRORTOKEN,
160 }:
166 }:
161 tokens.pop()
167 tokens.pop()
162
168
163 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
169 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
164 # of input. We need to remove those if we're in a multiline statement
170 # of input. We need to remove those if we're in a multiline statement
165 if tokens[-1].type == IN_MULTILINE_STATEMENT:
171 if tokens[-1].type == IN_MULTILINE_STATEMENT:
166 while tokens[-2].type in {tokenize.NL}:
172 while tokens[-2].type in {tokenize.NL}:
167 tokens.pop(-2)
173 tokens.pop(-2)
168
174
169
175
170 if tokens[-1].type == INCOMPLETE_STRING:
176 if tokens[-1].type == INCOMPLETE_STRING:
171 # Inside a multiline string
177 # Inside a multiline string
172 return 0
178 return 0
173
179
174 # Find the indents used before
180 # Find the indents used before
175 prev_indents = [0]
181 prev_indents = [0]
176 def _add_indent(n):
182 def _add_indent(n):
177 if n != prev_indents[-1]:
183 if n != prev_indents[-1]:
178 prev_indents.append(n)
184 prev_indents.append(n)
179
185
180 tokiter = iter(tokens)
186 tokiter = iter(tokens)
181 for tok in tokiter:
187 for tok in tokiter:
182 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
188 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
183 _add_indent(tok.end[1])
189 _add_indent(tok.end[1])
184 elif (tok.type == tokenize.NL):
190 elif (tok.type == tokenize.NL):
185 try:
191 try:
186 _add_indent(next(tokiter).start[1])
192 _add_indent(next(tokiter).start[1])
187 except StopIteration:
193 except StopIteration:
188 break
194 break
189
195
190 last_indent = prev_indents.pop()
196 last_indent = prev_indents.pop()
191
197
192 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
198 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
193 if tokens[-1].type == IN_MULTILINE_STATEMENT:
199 if tokens[-1].type == IN_MULTILINE_STATEMENT:
194 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
200 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
195 return last_indent + 4
201 return last_indent + 4
196 return last_indent
202 return last_indent
197
203
198 if tokens[-1].exact_type == tokenize.COLON:
204 if tokens[-1].exact_type == tokenize.COLON:
199 # Line ends with colon - indent
205 # Line ends with colon - indent
200 return last_indent + 4
206 return last_indent + 4
201
207
202 if last_indent:
208 if last_indent:
203 # Examine the last line for dedent cues - statements like return or
209 # Examine the last line for dedent cues - statements like return or
204 # raise which normally end a block of code.
210 # raise which normally end a block of code.
205 last_line_starts = 0
211 last_line_starts = 0
206 for i, tok in enumerate(tokens):
212 for i, tok in enumerate(tokens):
207 if tok.type == tokenize.NEWLINE:
213 if tok.type == tokenize.NEWLINE:
208 last_line_starts = i + 1
214 last_line_starts = i + 1
209
215
210 last_line_tokens = tokens[last_line_starts:]
216 last_line_tokens = tokens[last_line_starts:]
211 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
217 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
212 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
218 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
213 # Find the most recent indentation less than the current level
219 # Find the most recent indentation less than the current level
214 for indent in reversed(prev_indents):
220 for indent in reversed(prev_indents):
215 if indent < last_indent:
221 if indent < last_indent:
216 return indent
222 return indent
217
223
218 return last_indent
224 return last_indent
219
225
220
226
221 def last_blank(src):
227 def last_blank(src):
222 """Determine if the input source ends in a blank.
228 """Determine if the input source ends in a blank.
223
229
224 A blank is either a newline or a line consisting of whitespace.
230 A blank is either a newline or a line consisting of whitespace.
225
231
226 Parameters
232 Parameters
227 ----------
233 ----------
228 src : string
234 src : string
229 A single or multiline string.
235 A single or multiline string.
230 """
236 """
231 if not src: return False
237 if not src: return False
232 ll = src.splitlines()[-1]
238 ll = src.splitlines()[-1]
233 return (ll == '') or ll.isspace()
239 return (ll == '') or ll.isspace()
234
240
235
241
236 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
242 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
237 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
243 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
238
244
239 def last_two_blanks(src):
245 def last_two_blanks(src):
240 """Determine if the input source ends in two blanks.
246 """Determine if the input source ends in two blanks.
241
247
242 A blank is either a newline or a line consisting of whitespace.
248 A blank is either a newline or a line consisting of whitespace.
243
249
244 Parameters
250 Parameters
245 ----------
251 ----------
246 src : string
252 src : string
247 A single or multiline string.
253 A single or multiline string.
248 """
254 """
249 if not src: return False
255 if not src: return False
250 # The logic here is tricky: I couldn't get a regexp to work and pass all
256 # The logic here is tricky: I couldn't get a regexp to work and pass all
251 # the tests, so I took a different approach: split the source by lines,
257 # the tests, so I took a different approach: split the source by lines,
252 # grab the last two and prepend '###\n' as a stand-in for whatever was in
258 # grab the last two and prepend '###\n' as a stand-in for whatever was in
253 # the body before the last two lines. Then, with that structure, it's
259 # the body before the last two lines. Then, with that structure, it's
254 # possible to analyze with two regexps. Not the most elegant solution, but
260 # possible to analyze with two regexps. Not the most elegant solution, but
255 # it works. If anyone tries to change this logic, make sure to validate
261 # it works. If anyone tries to change this logic, make sure to validate
256 # the whole test suite first!
262 # the whole test suite first!
257 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
263 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
258 return (bool(last_two_blanks_re.match(new_src)) or
264 return (bool(last_two_blanks_re.match(new_src)) or
259 bool(last_two_blanks_re2.match(new_src)) )
265 bool(last_two_blanks_re2.match(new_src)) )
260
266
261
267
262 def remove_comments(src):
268 def remove_comments(src):
263 """Remove all comments from input source.
269 """Remove all comments from input source.
264
270
265 Note: comments are NOT recognized inside of strings!
271 Note: comments are NOT recognized inside of strings!
266
272
267 Parameters
273 Parameters
268 ----------
274 ----------
269 src : string
275 src : string
270 A single or multiline input string.
276 A single or multiline input string.
271
277
272 Returns
278 Returns
273 -------
279 -------
274 String with all Python comments removed.
280 String with all Python comments removed.
275 """
281 """
276
282
277 return re.sub('#.*', '', src)
283 return re.sub('#.*', '', src)
278
284
279
285
280 def get_input_encoding():
286 def get_input_encoding():
281 """Return the default standard input encoding.
287 """Return the default standard input encoding.
282
288
283 If sys.stdin has no encoding, 'ascii' is returned."""
289 If sys.stdin has no encoding, 'ascii' is returned."""
284 # There are strange environments for which sys.stdin.encoding is None. We
290 # There are strange environments for which sys.stdin.encoding is None. We
285 # ensure that a valid encoding is returned.
291 # ensure that a valid encoding is returned.
286 encoding = getattr(sys.stdin, 'encoding', None)
292 encoding = getattr(sys.stdin, 'encoding', None)
287 if encoding is None:
293 if encoding is None:
288 encoding = 'ascii'
294 encoding = 'ascii'
289 return encoding
295 return encoding
290
296
291 #-----------------------------------------------------------------------------
297 #-----------------------------------------------------------------------------
292 # Classes and functions for normal Python syntax handling
298 # Classes and functions for normal Python syntax handling
293 #-----------------------------------------------------------------------------
299 #-----------------------------------------------------------------------------
294
300
295 class InputSplitter(object):
301 class InputSplitter(object):
296 r"""An object that can accumulate lines of Python source before execution.
302 r"""An object that can accumulate lines of Python source before execution.
297
303
298 This object is designed to be fed python source line-by-line, using
304 This object is designed to be fed python source line-by-line, using
299 :meth:`push`. It will return on each push whether the currently pushed
305 :meth:`push`. It will return on each push whether the currently pushed
300 code could be executed already. In addition, it provides a method called
306 code could be executed already. In addition, it provides a method called
301 :meth:`push_accepts_more` that can be used to query whether more input
307 :meth:`push_accepts_more` that can be used to query whether more input
302 can be pushed into a single interactive block.
308 can be pushed into a single interactive block.
303
309
304 This is a simple example of how an interactive terminal-based client can use
310 This is a simple example of how an interactive terminal-based client can use
305 this tool::
311 this tool::
306
312
307 isp = InputSplitter()
313 isp = InputSplitter()
308 while isp.push_accepts_more():
314 while isp.push_accepts_more():
309 indent = ' '*isp.indent_spaces
315 indent = ' '*isp.indent_spaces
310 prompt = '>>> ' + indent
316 prompt = '>>> ' + indent
311 line = indent + raw_input(prompt)
317 line = indent + raw_input(prompt)
312 isp.push(line)
318 isp.push(line)
313 print 'Input source was:\n', isp.source_reset(),
319 print 'Input source was:\n', isp.source_reset(),
314 """
320 """
315 # A cache for storing the current indentation
321 # A cache for storing the current indentation
316 # The first value stores the most recently processed source input
322 # The first value stores the most recently processed source input
317 # The second value is the number of spaces for the current indentation
323 # The second value is the number of spaces for the current indentation
318 # If self.source matches the first value, the second value is a valid
324 # If self.source matches the first value, the second value is a valid
319 # current indentation. Otherwise, the cache is invalid and the indentation
325 # current indentation. Otherwise, the cache is invalid and the indentation
320 # must be recalculated.
326 # must be recalculated.
321 _indent_spaces_cache = None, None
327 _indent_spaces_cache = None, None
322 # String, indicating the default input encoding. It is computed by default
328 # String, indicating the default input encoding. It is computed by default
323 # at initialization time via get_input_encoding(), but it can be reset by a
329 # at initialization time via get_input_encoding(), but it can be reset by a
324 # client with specific knowledge of the encoding.
330 # client with specific knowledge of the encoding.
325 encoding = ''
331 encoding = ''
326 # String where the current full source input is stored, properly encoded.
332 # String where the current full source input is stored, properly encoded.
327 # Reading this attribute is the normal way of querying the currently pushed
333 # Reading this attribute is the normal way of querying the currently pushed
328 # source code, that has been properly encoded.
334 # source code, that has been properly encoded.
329 source = ''
335 source = ''
330 # Code object corresponding to the current source. It is automatically
336 # Code object corresponding to the current source. It is automatically
331 # synced to the source, so it can be queried at any time to obtain the code
337 # synced to the source, so it can be queried at any time to obtain the code
332 # object; it will be None if the source doesn't compile to valid Python.
338 # object; it will be None if the source doesn't compile to valid Python.
333 code = None
339 code = None
334
340
335 # Private attributes
341 # Private attributes
336
342
337 # List with lines of input accumulated so far
343 # List with lines of input accumulated so far
338 _buffer: List[str]
344 _buffer: List[str]
339 # Command compiler
345 # Command compiler
340 _compile: codeop.CommandCompiler
346 _compile: codeop.CommandCompiler
341 # Boolean indicating whether the current block is complete
347 # Boolean indicating whether the current block is complete
342 _is_complete = None
348 _is_complete = None
343 # Boolean indicating whether the current block has an unrecoverable syntax error
349 # Boolean indicating whether the current block has an unrecoverable syntax error
344 _is_invalid = False
350 _is_invalid = False
345
351
346 def __init__(self) -> None:
352 def __init__(self) -> None:
347 """Create a new InputSplitter instance."""
353 """Create a new InputSplitter instance."""
348 self._buffer = []
354 self._buffer = []
349 self._compile = codeop.CommandCompiler()
355 self._compile = codeop.CommandCompiler()
350 self.encoding = get_input_encoding()
356 self.encoding = get_input_encoding()
351
357
352 def reset(self):
358 def reset(self):
353 """Reset the input buffer and associated state."""
359 """Reset the input buffer and associated state."""
354 self._buffer[:] = []
360 self._buffer[:] = []
355 self.source = ''
361 self.source = ''
356 self.code = None
362 self.code = None
357 self._is_complete = False
363 self._is_complete = False
358 self._is_invalid = False
364 self._is_invalid = False
359
365
360 def source_reset(self):
366 def source_reset(self):
361 """Return the input source and perform a full reset.
367 """Return the input source and perform a full reset.
362 """
368 """
363 out = self.source
369 out = self.source
364 self.reset()
370 self.reset()
365 return out
371 return out
366
372
367 def check_complete(self, source):
373 def check_complete(self, source):
368 """Return whether a block of code is ready to execute, or should be continued
374 """Return whether a block of code is ready to execute, or should be continued
369
375
370 This is a non-stateful API, and will reset the state of this InputSplitter.
376 This is a non-stateful API, and will reset the state of this InputSplitter.
371
377
372 Parameters
378 Parameters
373 ----------
379 ----------
374 source : string
380 source : string
375 Python input code, which can be multiline.
381 Python input code, which can be multiline.
376
382
377 Returns
383 Returns
378 -------
384 -------
379 status : str
385 status : str
380 One of 'complete', 'incomplete', or 'invalid' if source is not a
386 One of 'complete', 'incomplete', or 'invalid' if source is not a
381 prefix of valid code.
387 prefix of valid code.
382 indent_spaces : int or None
388 indent_spaces : int or None
383 The number of spaces by which to indent the next line of code. If
389 The number of spaces by which to indent the next line of code. If
384 status is not 'incomplete', this is None.
390 status is not 'incomplete', this is None.
385 """
391 """
386 self.reset()
392 self.reset()
387 try:
393 try:
388 self.push(source)
394 self.push(source)
389 except SyntaxError:
395 except SyntaxError:
390 # Transformers in IPythonInputSplitter can raise SyntaxError,
396 # Transformers in IPythonInputSplitter can raise SyntaxError,
391 # which push() will not catch.
397 # which push() will not catch.
392 return 'invalid', None
398 return 'invalid', None
393 else:
399 else:
394 if self._is_invalid:
400 if self._is_invalid:
395 return 'invalid', None
401 return 'invalid', None
396 elif self.push_accepts_more():
402 elif self.push_accepts_more():
397 return 'incomplete', self.get_indent_spaces()
403 return 'incomplete', self.get_indent_spaces()
398 else:
404 else:
399 return 'complete', None
405 return 'complete', None
400 finally:
406 finally:
401 self.reset()
407 self.reset()
402
408
403 def push(self, lines:str) -> bool:
409 def push(self, lines:str) -> bool:
404 """Push one or more lines of input.
410 """Push one or more lines of input.
405
411
406 This stores the given lines and returns a status code indicating
412 This stores the given lines and returns a status code indicating
407 whether the code forms a complete Python block or not.
413 whether the code forms a complete Python block or not.
408
414
409 Any exceptions generated in compilation are swallowed, but if an
415 Any exceptions generated in compilation are swallowed, but if an
410 exception was produced, the method returns True.
416 exception was produced, the method returns True.
411
417
412 Parameters
418 Parameters
413 ----------
419 ----------
414 lines : string
420 lines : string
415 One or more lines of Python input.
421 One or more lines of Python input.
416
422
417 Returns
423 Returns
418 -------
424 -------
419 is_complete : boolean
425 is_complete : boolean
420 True if the current input source (the result of the current input
426 True if the current input source (the result of the current input
421 plus prior inputs) forms a complete Python execution block. Note that
427 plus prior inputs) forms a complete Python execution block. Note that
422 this value is also stored as a private attribute (``_is_complete``), so it
428 this value is also stored as a private attribute (``_is_complete``), so it
423 can be queried at any time.
429 can be queried at any time.
424 """
430 """
425 assert isinstance(lines, str)
431 assert isinstance(lines, str)
426 self._store(lines)
432 self._store(lines)
427 source = self.source
433 source = self.source
428
434
429 # Before calling _compile(), reset the code object to None so that if an
435 # Before calling _compile(), reset the code object to None so that if an
430 # exception is raised in compilation, we don't mislead by having
436 # exception is raised in compilation, we don't mislead by having
431 # inconsistent code/source attributes.
437 # inconsistent code/source attributes.
432 self.code, self._is_complete = None, None
438 self.code, self._is_complete = None, None
433 self._is_invalid = False
439 self._is_invalid = False
434
440
435 # Honor termination lines properly
441 # Honor termination lines properly
436 if source.endswith('\\\n'):
442 if source.endswith('\\\n'):
437 return False
443 return False
438
444
439 try:
445 try:
440 with warnings.catch_warnings():
446 with warnings.catch_warnings():
441 warnings.simplefilter('error', SyntaxWarning)
447 warnings.simplefilter('error', SyntaxWarning)
442 self.code = self._compile(source, symbol="exec")
448 self.code = self._compile(source, symbol="exec")
443 # Invalid syntax can produce any of a number of different errors from
449 # Invalid syntax can produce any of a number of different errors from
444 # inside the compiler, so we have to catch them all. Syntax errors
450 # inside the compiler, so we have to catch them all. Syntax errors
445 # immediately produce a 'ready' block, so the invalid Python can be
451 # immediately produce a 'ready' block, so the invalid Python can be
446 # sent to the kernel for evaluation with possible ipython
452 # sent to the kernel for evaluation with possible ipython
447 # special-syntax conversion.
453 # special-syntax conversion.
448 except (SyntaxError, OverflowError, ValueError, TypeError,
454 except (SyntaxError, OverflowError, ValueError, TypeError,
449 MemoryError, SyntaxWarning):
455 MemoryError, SyntaxWarning):
450 self._is_complete = True
456 self._is_complete = True
451 self._is_invalid = True
457 self._is_invalid = True
452 else:
458 else:
453 # Compilation didn't produce any exceptions (though it may not have
459 # Compilation didn't produce any exceptions (though it may not have
454 # given a complete code object)
460 # given a complete code object)
455 self._is_complete = self.code is not None
461 self._is_complete = self.code is not None
456
462
457 return self._is_complete
463 return self._is_complete
458
464
459 def push_accepts_more(self):
465 def push_accepts_more(self):
460 """Return whether a block of interactive input can accept more input.
466 """Return whether a block of interactive input can accept more input.
461
467
462 This method is meant to be used by line-oriented frontends, who need to
468 This method is meant to be used by line-oriented frontends, who need to
463 guess whether a block is complete or not based solely on prior and
469 guess whether a block is complete or not based solely on prior and
464 current input lines. The InputSplitter considers it has a complete
470 current input lines. The InputSplitter considers it has a complete
465 interactive block and will not accept more input when either:
471 interactive block and will not accept more input when either:
466
472
467 * A SyntaxError is raised
473 * A SyntaxError is raised
468
474
469 * The code is complete and consists of a single line or a single
475 * The code is complete and consists of a single line or a single
470 non-compound statement
476 non-compound statement
471
477
472 * The code is complete and has a blank line at the end
478 * The code is complete and has a blank line at the end
473
479
474 If the current input produces a syntax error, this method immediately
480 If the current input produces a syntax error, this method immediately
475 returns False but does *not* raise the syntax error exception, as
481 returns False but does *not* raise the syntax error exception, as
476 typically clients will want to send invalid syntax to an execution
482 typically clients will want to send invalid syntax to an execution
477 backend which might convert the invalid syntax into valid Python via
483 backend which might convert the invalid syntax into valid Python via
478 one of the dynamic IPython mechanisms.
484 one of the dynamic IPython mechanisms.
479 """
485 """
480
486
481 # With incomplete input, unconditionally accept more
487 # With incomplete input, unconditionally accept more
482 # A syntax error also sets _is_complete to True - see push()
488 # A syntax error also sets _is_complete to True - see push()
483 if not self._is_complete:
489 if not self._is_complete:
484 #print("Not complete") # debug
490 #print("Not complete") # debug
485 return True
491 return True
486
492
487 # The user can make any (complete) input execute by leaving a blank line
493 # The user can make any (complete) input execute by leaving a blank line
488 last_line = self.source.splitlines()[-1]
494 last_line = self.source.splitlines()[-1]
489 if (not last_line) or last_line.isspace():
495 if (not last_line) or last_line.isspace():
490 #print("Blank line") # debug
496 #print("Blank line") # debug
491 return False
497 return False
492
498
493 # If there's just a single line or AST node, and we're flush left, as is
499 # If there's just a single line or AST node, and we're flush left, as is
494 # the case after a simple statement such as 'a=1', we want to execute it
500 # the case after a simple statement such as 'a=1', we want to execute it
495 # straight away.
501 # straight away.
496 if self.get_indent_spaces() == 0:
502 if self.get_indent_spaces() == 0:
497 if len(self.source.splitlines()) <= 1:
503 if len(self.source.splitlines()) <= 1:
498 return False
504 return False
499
505
500 try:
506 try:
501 code_ast = ast.parse("".join(self._buffer))
507 code_ast = ast.parse("".join(self._buffer))
502 except Exception:
508 except Exception:
503 #print("Can't parse AST") # debug
509 #print("Can't parse AST") # debug
504 return False
510 return False
505 else:
511 else:
506 if len(code_ast.body) == 1 and \
512 if len(code_ast.body) == 1 and \
507 not hasattr(code_ast.body[0], 'body'):
513 not hasattr(code_ast.body[0], 'body'):
508 #print("Simple statement") # debug
514 #print("Simple statement") # debug
509 return False
515 return False
510
516
511 # General fallback - accept more code
517 # General fallback - accept more code
512 return True
518 return True
513
519
514 def get_indent_spaces(self):
520 def get_indent_spaces(self):
515 sourcefor, n = self._indent_spaces_cache
521 sourcefor, n = self._indent_spaces_cache
516 if sourcefor == self.source:
522 if sourcefor == self.source:
517 return n
523 return n
518
524
519 # self.source always has a trailing newline
525 # self.source always has a trailing newline
520 n = find_next_indent(self.source[:-1])
526 n = find_next_indent(self.source[:-1])
521 self._indent_spaces_cache = (self.source, n)
527 self._indent_spaces_cache = (self.source, n)
522 return n
528 return n
523
529
524 # Backwards compatibility. I think all code that used .indent_spaces was
530 # Backwards compatibility. I think all code that used .indent_spaces was
525 # inside IPython, but we can leave this here until IPython 7 in case any
531 # inside IPython, but we can leave this here until IPython 7 in case any
526 # other modules are using it. -TK, November 2017
532 # other modules are using it. -TK, November 2017
527 indent_spaces = property(get_indent_spaces)
533 indent_spaces = property(get_indent_spaces)
528
534
529 def _store(self, lines, buffer=None, store='source'):
535 def _store(self, lines, buffer=None, store='source'):
530 """Store one or more lines of input.
536 """Store one or more lines of input.
531
537
532 If input lines are not newline-terminated, a newline is automatically
538 If input lines are not newline-terminated, a newline is automatically
533 appended."""
539 appended."""
534
540
535 if buffer is None:
541 if buffer is None:
536 buffer = self._buffer
542 buffer = self._buffer
537
543
538 if lines.endswith('\n'):
544 if lines.endswith('\n'):
539 buffer.append(lines)
545 buffer.append(lines)
540 else:
546 else:
541 buffer.append(lines+'\n')
547 buffer.append(lines+'\n')
542 setattr(self, store, self._set_source(buffer))
548 setattr(self, store, self._set_source(buffer))
543
549
544 def _set_source(self, buffer):
550 def _set_source(self, buffer):
545 return u''.join(buffer)
551 return u''.join(buffer)
546
552
547
553
548 class IPythonInputSplitter(InputSplitter):
554 class IPythonInputSplitter(InputSplitter):
549 """An input splitter that recognizes all of IPython's special syntax."""
555 """An input splitter that recognizes all of IPython's special syntax."""
550
556
551 # String with raw, untransformed input.
557 # String with raw, untransformed input.
552 source_raw = ''
558 source_raw = ''
553
559
554 # Flag to track when a transformer has stored input that it hasn't given
560 # Flag to track when a transformer has stored input that it hasn't given
555 # back yet.
561 # back yet.
556 transformer_accumulating = False
562 transformer_accumulating = False
557
563
558 # Flag to track when assemble_python_lines has stored input that it hasn't
564 # Flag to track when assemble_python_lines has stored input that it hasn't
559 # given back yet.
565 # given back yet.
560 within_python_line = False
566 within_python_line = False
561
567
562 # Private attributes
568 # Private attributes
563
569
564 # List with lines of raw input accumulated so far.
570 # List with lines of raw input accumulated so far.
565 _buffer_raw = None
571 _buffer_raw = None
566
572
567 def __init__(self, line_input_checker=True, physical_line_transforms=None,
573 def __init__(self, line_input_checker=True, physical_line_transforms=None,
568 logical_line_transforms=None, python_line_transforms=None):
574 logical_line_transforms=None, python_line_transforms=None):
569 super(IPythonInputSplitter, self).__init__()
575 super(IPythonInputSplitter, self).__init__()
570 self._buffer_raw = []
576 self._buffer_raw = []
571 self._validate = True
577 self._validate = True
572
578
573 if physical_line_transforms is not None:
579 if physical_line_transforms is not None:
574 self.physical_line_transforms = physical_line_transforms
580 self.physical_line_transforms = physical_line_transforms
575 else:
581 else:
576 self.physical_line_transforms = [
582 self.physical_line_transforms = [
577 leading_indent(),
583 leading_indent(),
578 classic_prompt(),
584 classic_prompt(),
579 ipy_prompt(),
585 ipy_prompt(),
580 cellmagic(end_on_blank_line=line_input_checker),
586 cellmagic(end_on_blank_line=line_input_checker),
581 ]
587 ]
582
588
583 self.assemble_logical_lines = assemble_logical_lines()
589 self.assemble_logical_lines = assemble_logical_lines()
584 if logical_line_transforms is not None:
590 if logical_line_transforms is not None:
585 self.logical_line_transforms = logical_line_transforms
591 self.logical_line_transforms = logical_line_transforms
586 else:
592 else:
587 self.logical_line_transforms = [
593 self.logical_line_transforms = [
588 help_end(),
594 help_end(),
589 escaped_commands(),
595 escaped_commands(),
590 assign_from_magic(),
596 assign_from_magic(),
591 assign_from_system(),
597 assign_from_system(),
592 ]
598 ]
593
599
594 self.assemble_python_lines = assemble_python_lines()
600 self.assemble_python_lines = assemble_python_lines()
595 if python_line_transforms is not None:
601 if python_line_transforms is not None:
596 self.python_line_transforms = python_line_transforms
602 self.python_line_transforms = python_line_transforms
597 else:
603 else:
598 # We don't use any of these at present
604 # We don't use any of these at present
599 self.python_line_transforms = []
605 self.python_line_transforms = []
600
606
601 @property
607 @property
602 def transforms(self):
608 def transforms(self):
603 "Quick access to all transformers."
609 "Quick access to all transformers."
604 return self.physical_line_transforms + \
610 return self.physical_line_transforms + \
605 [self.assemble_logical_lines] + self.logical_line_transforms + \
611 [self.assemble_logical_lines] + self.logical_line_transforms + \
606 [self.assemble_python_lines] + self.python_line_transforms
612 [self.assemble_python_lines] + self.python_line_transforms
607
613
608 @property
614 @property
609 def transforms_in_use(self):
615 def transforms_in_use(self):
610 """Transformers, excluding logical line transformers if we're in a
616 """Transformers, excluding logical line transformers if we're in a
611 Python line."""
617 Python line."""
612 t = self.physical_line_transforms[:]
618 t = self.physical_line_transforms[:]
613 if not self.within_python_line:
619 if not self.within_python_line:
614 t += [self.assemble_logical_lines] + self.logical_line_transforms
620 t += [self.assemble_logical_lines] + self.logical_line_transforms
615 return t + [self.assemble_python_lines] + self.python_line_transforms
621 return t + [self.assemble_python_lines] + self.python_line_transforms
616
622
617 def reset(self):
623 def reset(self):
618 """Reset the input buffer and associated state."""
624 """Reset the input buffer and associated state."""
619 super(IPythonInputSplitter, self).reset()
625 super(IPythonInputSplitter, self).reset()
620 self._buffer_raw[:] = []
626 self._buffer_raw[:] = []
621 self.source_raw = ''
627 self.source_raw = ''
622 self.transformer_accumulating = False
628 self.transformer_accumulating = False
623 self.within_python_line = False
629 self.within_python_line = False
624
630
625 for t in self.transforms:
631 for t in self.transforms:
626 try:
632 try:
627 t.reset()
633 t.reset()
628 except SyntaxError:
634 except SyntaxError:
629 # Nothing that calls reset() expects to handle transformer
635 # Nothing that calls reset() expects to handle transformer
630 # errors
636 # errors
631 pass
637 pass
632
638
633 def flush_transformers(self):
639 def flush_transformers(self):
634 def _flush(transform, outs):
640 def _flush(transform, outs):
635 """yield transformed lines
641 """yield transformed lines
636
642
637 always strings, never None
643 always strings, never None
638
644
639 transform: the current transform
645 transform: the current transform
640 outs: an iterable of previously transformed inputs.
646 outs: an iterable of previously transformed inputs.
641 Each may be multiline, which will be passed
647 Each may be multiline, which will be passed
642 one line at a time to transform.
648 one line at a time to transform.
643 """
649 """
644 for out in outs:
650 for out in outs:
645 for line in out.splitlines():
651 for line in out.splitlines():
646 # push one line at a time
652 # push one line at a time
647 tmp = transform.push(line)
653 tmp = transform.push(line)
648 if tmp is not None:
654 if tmp is not None:
649 yield tmp
655 yield tmp
650
656
651 # reset the transform
657 # reset the transform
652 tmp = transform.reset()
658 tmp = transform.reset()
653 if tmp is not None:
659 if tmp is not None:
654 yield tmp
660 yield tmp
655
661
656 out = []
662 out = []
657 for t in self.transforms_in_use:
663 for t in self.transforms_in_use:
658 out = _flush(t, out)
664 out = _flush(t, out)
659
665
660 out = list(out)
666 out = list(out)
661 if out:
667 if out:
662 self._store('\n'.join(out))
668 self._store('\n'.join(out))
663
669
664 def raw_reset(self):
670 def raw_reset(self):
665 """Return raw input only and perform a full reset.
671 """Return raw input only and perform a full reset.
666 """
672 """
667 out = self.source_raw
673 out = self.source_raw
668 self.reset()
674 self.reset()
669 return out
675 return out
670
676
671 def source_reset(self):
677 def source_reset(self):
672 try:
678 try:
673 self.flush_transformers()
679 self.flush_transformers()
674 return self.source
680 return self.source
675 finally:
681 finally:
676 self.reset()
682 self.reset()
677
683
678 def push_accepts_more(self):
684 def push_accepts_more(self):
679 if self.transformer_accumulating:
685 if self.transformer_accumulating:
680 return True
686 return True
681 else:
687 else:
682 return super(IPythonInputSplitter, self).push_accepts_more()
688 return super(IPythonInputSplitter, self).push_accepts_more()
683
689
684 def transform_cell(self, cell):
690 def transform_cell(self, cell):
685 """Process and translate a cell of input.
691 """Process and translate a cell of input.
686 """
692 """
687 self.reset()
693 self.reset()
688 try:
694 try:
689 self.push(cell)
695 self.push(cell)
690 self.flush_transformers()
696 self.flush_transformers()
691 return self.source
697 return self.source
692 finally:
698 finally:
693 self.reset()
699 self.reset()
694
700
695 def push(self, lines:str) -> bool:
701 def push(self, lines:str) -> bool:
696 """Push one or more lines of IPython input.
702 """Push one or more lines of IPython input.
697
703
698 This stores the given lines and returns a status code indicating
704 This stores the given lines and returns a status code indicating
699 whether the code forms a complete Python block or not, after processing
705 whether the code forms a complete Python block or not, after processing
700 all input lines for special IPython syntax.
706 all input lines for special IPython syntax.
701
707
702 Any exceptions generated in compilation are swallowed, but if an
708 Any exceptions generated in compilation are swallowed, but if an
703 exception was produced, the method returns True.
709 exception was produced, the method returns True.
704
710
705 Parameters
711 Parameters
706 ----------
712 ----------
707 lines : string
713 lines : string
708 One or more lines of Python input.
714 One or more lines of Python input.
709
715
710 Returns
716 Returns
711 -------
717 -------
712 is_complete : boolean
718 is_complete : boolean
713 True if the current input source (the result of the current input
719 True if the current input source (the result of the current input
714 plus prior inputs) forms a complete Python execution block. Note that
720 plus prior inputs) forms a complete Python execution block. Note that
715 this value is also stored as a private attribute (_is_complete), so it
721 this value is also stored as a private attribute (_is_complete), so it
716 can be queried at any time.
722 can be queried at any time.
717 """
723 """
718 assert isinstance(lines, str)
724 assert isinstance(lines, str)
719 # We must ensure all input is pure unicode
725 # We must ensure all input is pure unicode
720 # ''.splitlines() --> [], but we need to push the empty line to transformers
726 # ''.splitlines() --> [], but we need to push the empty line to transformers
721 lines_list = lines.splitlines()
727 lines_list = lines.splitlines()
722 if not lines_list:
728 if not lines_list:
723 lines_list = ['']
729 lines_list = ['']
724
730
725 # Store raw source before applying any transformations to it. Note
731 # Store raw source before applying any transformations to it. Note
726 # that this must be done *after* the reset() call that would otherwise
732 # that this must be done *after* the reset() call that would otherwise
727 # flush the buffer.
733 # flush the buffer.
728 self._store(lines, self._buffer_raw, 'source_raw')
734 self._store(lines, self._buffer_raw, 'source_raw')
729
735
730 transformed_lines_list = []
736 transformed_lines_list = []
731 for line in lines_list:
737 for line in lines_list:
732 transformed = self._transform_line(line)
738 transformed = self._transform_line(line)
733 if transformed is not None:
739 if transformed is not None:
734 transformed_lines_list.append(transformed)
740 transformed_lines_list.append(transformed)
735
741
736 if transformed_lines_list:
742 if transformed_lines_list:
737 transformed_lines = '\n'.join(transformed_lines_list)
743 transformed_lines = '\n'.join(transformed_lines_list)
738 return super(IPythonInputSplitter, self).push(transformed_lines)
744 return super(IPythonInputSplitter, self).push(transformed_lines)
739 else:
745 else:
740 # Got nothing back from transformers - they must be waiting for
746 # Got nothing back from transformers - they must be waiting for
741 # more input.
747 # more input.
742 return False
748 return False
743
749
744 def _transform_line(self, line):
750 def _transform_line(self, line):
745 """Push a line of input code through the various transformers.
751 """Push a line of input code through the various transformers.
746
752
747 Returns any output from the transformers, or None if a transformer
753 Returns any output from the transformers, or None if a transformer
748 is accumulating lines.
754 is accumulating lines.
749
755
750 Sets self.transformer_accumulating as a side effect.
756 Sets self.transformer_accumulating as a side effect.
751 """
757 """
752 def _accumulating(dbg):
758 def _accumulating(dbg):
753 #print(dbg)
759 #print(dbg)
754 self.transformer_accumulating = True
760 self.transformer_accumulating = True
755 return None
761 return None
756
762
757 for transformer in self.physical_line_transforms:
763 for transformer in self.physical_line_transforms:
758 line = transformer.push(line)
764 line = transformer.push(line)
759 if line is None:
765 if line is None:
760 return _accumulating(transformer)
766 return _accumulating(transformer)
761
767
762 if not self.within_python_line:
768 if not self.within_python_line:
763 line = self.assemble_logical_lines.push(line)
769 line = self.assemble_logical_lines.push(line)
764 if line is None:
770 if line is None:
765 return _accumulating('acc logical line')
771 return _accumulating('acc logical line')
766
772
767 for transformer in self.logical_line_transforms:
773 for transformer in self.logical_line_transforms:
768 line = transformer.push(line)
774 line = transformer.push(line)
769 if line is None:
775 if line is None:
770 return _accumulating(transformer)
776 return _accumulating(transformer)
771
777
772 line = self.assemble_python_lines.push(line)
778 line = self.assemble_python_lines.push(line)
773 if line is None:
779 if line is None:
774 self.within_python_line = True
780 self.within_python_line = True
775 return _accumulating('acc python line')
781 return _accumulating('acc python line')
776 else:
782 else:
777 self.within_python_line = False
783 self.within_python_line = False
778
784
779 for transformer in self.python_line_transforms:
785 for transformer in self.python_line_transforms:
780 line = transformer.push(line)
786 line = transformer.push(line)
781 if line is None:
787 if line is None:
782 return _accumulating(transformer)
788 return _accumulating(transformer)
783
789
784 #print("transformers clear") #debug
790 #print("transformers clear") #debug
785 self.transformer_accumulating = False
791 self.transformer_accumulating = False
786 return line
792 return line
787
793
@@ -1,752 +1,782 b''
1 # encoding: utf-8
1 # encoding: utf-8
2 """
2 """
3 Utilities for working with strings and text.
3 Utilities for working with strings and text.
4
4
5 Inheritance diagram:
5 Inheritance diagram:
6
6
7 .. inheritance-diagram:: IPython.utils.text
7 .. inheritance-diagram:: IPython.utils.text
8 :parts: 3
8 :parts: 3
9 """
9 """
10
10
11 import os
11 import os
12 import re
12 import re
13 import string
13 import string
14 import sys
14 import sys
15 import textwrap
15 import textwrap
16 import warnings
16 from string import Formatter
17 from string import Formatter
17 from pathlib import Path
18 from pathlib import Path
18
19
19
20
20 # datetime.strftime date format for ipython
21 if sys.platform == 'win32':
22 date_format = "%B %d, %Y"
23 else:
24 date_format = "%B %-d, %Y"
25
26 class LSString(str):
21 class LSString(str):
27 """String derivative with a special access attributes.
22 """String derivative with a special access attributes.
28
23
29 These are normal strings, but with the special attributes:
24 These are normal strings, but with the special attributes:
30
25
31 .l (or .list) : value as list (split on newlines).
26 .l (or .list) : value as list (split on newlines).
32 .n (or .nlstr): original value (the string itself).
27 .n (or .nlstr): original value (the string itself).
33 .s (or .spstr): value as whitespace-separated string.
28 .s (or .spstr): value as whitespace-separated string.
34 .p (or .paths): list of path objects (requires path.py package)
29 .p (or .paths): list of path objects (requires path.py package)
35
30
36 Any values which require transformations are computed only once and
31 Any values which require transformations are computed only once and
37 cached.
32 cached.
38
33
39 Such strings are very useful to efficiently interact with the shell, which
34 Such strings are very useful to efficiently interact with the shell, which
40 typically only understands whitespace-separated options for commands."""
35 typically only understands whitespace-separated options for commands."""
41
36
42 def get_list(self):
37 def get_list(self):
43 try:
38 try:
44 return self.__list
39 return self.__list
45 except AttributeError:
40 except AttributeError:
46 self.__list = self.split('\n')
41 self.__list = self.split('\n')
47 return self.__list
42 return self.__list
48
43
49 l = list = property(get_list)
44 l = list = property(get_list)
50
45
51 def get_spstr(self):
46 def get_spstr(self):
52 try:
47 try:
53 return self.__spstr
48 return self.__spstr
54 except AttributeError:
49 except AttributeError:
55 self.__spstr = self.replace('\n',' ')
50 self.__spstr = self.replace('\n',' ')
56 return self.__spstr
51 return self.__spstr
57
52
58 s = spstr = property(get_spstr)
53 s = spstr = property(get_spstr)
59
54
60 def get_nlstr(self):
55 def get_nlstr(self):
61 return self
56 return self
62
57
63 n = nlstr = property(get_nlstr)
58 n = nlstr = property(get_nlstr)
64
59
65 def get_paths(self):
60 def get_paths(self):
66 try:
61 try:
67 return self.__paths
62 return self.__paths
68 except AttributeError:
63 except AttributeError:
69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
64 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 return self.__paths
65 return self.__paths
71
66
72 p = paths = property(get_paths)
67 p = paths = property(get_paths)
73
68
74 # FIXME: We need to reimplement type specific displayhook and then add this
69 # FIXME: We need to reimplement type specific displayhook and then add this
75 # back as a custom printer. This should also be moved outside utils into the
70 # back as a custom printer. This should also be moved outside utils into the
76 # core.
71 # core.
77
72
78 # def print_lsstring(arg):
73 # def print_lsstring(arg):
79 # """ Prettier (non-repr-like) and more informative printer for LSString """
74 # """ Prettier (non-repr-like) and more informative printer for LSString """
80 # print "LSString (.p, .n, .l, .s available). Value:"
75 # print "LSString (.p, .n, .l, .s available). Value:"
81 # print arg
76 # print arg
82 #
77 #
83 #
78 #
84 # print_lsstring = result_display.register(LSString)(print_lsstring)
79 # print_lsstring = result_display.register(LSString)(print_lsstring)
85
80
86
81
87 class SList(list):
82 class SList(list):
88 """List derivative with a special access attributes.
83 """List derivative with a special access attributes.
89
84
90 These are normal lists, but with the special attributes:
85 These are normal lists, but with the special attributes:
91
86
92 * .l (or .list) : value as list (the list itself).
87 * .l (or .list) : value as list (the list itself).
93 * .n (or .nlstr): value as a string, joined on newlines.
88 * .n (or .nlstr): value as a string, joined on newlines.
94 * .s (or .spstr): value as a string, joined on spaces.
89 * .s (or .spstr): value as a string, joined on spaces.
95 * .p (or .paths): list of path objects (requires path.py package)
90 * .p (or .paths): list of path objects (requires path.py package)
96
91
97 Any values which require transformations are computed only once and
92 Any values which require transformations are computed only once and
98 cached."""
93 cached."""
99
94
100 def get_list(self):
95 def get_list(self):
101 return self
96 return self
102
97
103 l = list = property(get_list)
98 l = list = property(get_list)
104
99
105 def get_spstr(self):
100 def get_spstr(self):
106 try:
101 try:
107 return self.__spstr
102 return self.__spstr
108 except AttributeError:
103 except AttributeError:
109 self.__spstr = ' '.join(self)
104 self.__spstr = ' '.join(self)
110 return self.__spstr
105 return self.__spstr
111
106
112 s = spstr = property(get_spstr)
107 s = spstr = property(get_spstr)
113
108
114 def get_nlstr(self):
109 def get_nlstr(self):
115 try:
110 try:
116 return self.__nlstr
111 return self.__nlstr
117 except AttributeError:
112 except AttributeError:
118 self.__nlstr = '\n'.join(self)
113 self.__nlstr = '\n'.join(self)
119 return self.__nlstr
114 return self.__nlstr
120
115
121 n = nlstr = property(get_nlstr)
116 n = nlstr = property(get_nlstr)
122
117
123 def get_paths(self):
118 def get_paths(self):
124 try:
119 try:
125 return self.__paths
120 return self.__paths
126 except AttributeError:
121 except AttributeError:
127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
122 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 return self.__paths
123 return self.__paths
129
124
130 p = paths = property(get_paths)
125 p = paths = property(get_paths)
131
126
132 def grep(self, pattern, prune = False, field = None):
127 def grep(self, pattern, prune = False, field = None):
133 """ Return all strings matching 'pattern' (a regex or callable)
128 """ Return all strings matching 'pattern' (a regex or callable)
134
129
135 This is case-insensitive. If prune is true, return all items
130 This is case-insensitive. If prune is true, return all items
136 NOT matching the pattern.
131 NOT matching the pattern.
137
132
138 If field is specified, the match must occur in the specified
133 If field is specified, the match must occur in the specified
139 whitespace-separated field.
134 whitespace-separated field.
140
135
141 Examples::
136 Examples::
142
137
143 a.grep( lambda x: x.startswith('C') )
138 a.grep( lambda x: x.startswith('C') )
144 a.grep('Cha.*log', prune=1)
139 a.grep('Cha.*log', prune=1)
145 a.grep('chm', field=-1)
140 a.grep('chm', field=-1)
146 """
141 """
147
142
148 def match_target(s):
143 def match_target(s):
149 if field is None:
144 if field is None:
150 return s
145 return s
151 parts = s.split()
146 parts = s.split()
152 try:
147 try:
153 tgt = parts[field]
148 tgt = parts[field]
154 return tgt
149 return tgt
155 except IndexError:
150 except IndexError:
156 return ""
151 return ""
157
152
158 if isinstance(pattern, str):
153 if isinstance(pattern, str):
159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
154 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 else:
155 else:
161 pred = pattern
156 pred = pattern
162 if not prune:
157 if not prune:
163 return SList([el for el in self if pred(match_target(el))])
158 return SList([el for el in self if pred(match_target(el))])
164 else:
159 else:
165 return SList([el for el in self if not pred(match_target(el))])
160 return SList([el for el in self if not pred(match_target(el))])
166
161
167 def fields(self, *fields):
162 def fields(self, *fields):
168 """ Collect whitespace-separated fields from string list
163 """ Collect whitespace-separated fields from string list
169
164
170 Allows quick awk-like usage of string lists.
165 Allows quick awk-like usage of string lists.
171
166
172 Example data (in var a, created by 'a = !ls -l')::
167 Example data (in var a, created by 'a = !ls -l')::
173
168
174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
169 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
170 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
176
171
177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
172 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
173 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 (note the joining by space).
174 (note the joining by space).
180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
175 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
181
176
182 IndexErrors are ignored.
177 IndexErrors are ignored.
183
178
184 Without args, fields() just split()'s the strings.
179 Without args, fields() just split()'s the strings.
185 """
180 """
186 if len(fields) == 0:
181 if len(fields) == 0:
187 return [el.split() for el in self]
182 return [el.split() for el in self]
188
183
189 res = SList()
184 res = SList()
190 for el in [f.split() for f in self]:
185 for el in [f.split() for f in self]:
191 lineparts = []
186 lineparts = []
192
187
193 for fd in fields:
188 for fd in fields:
194 try:
189 try:
195 lineparts.append(el[fd])
190 lineparts.append(el[fd])
196 except IndexError:
191 except IndexError:
197 pass
192 pass
198 if lineparts:
193 if lineparts:
199 res.append(" ".join(lineparts))
194 res.append(" ".join(lineparts))
200
195
201 return res
196 return res
202
197
203 def sort(self,field= None, nums = False):
198 def sort(self,field= None, nums = False):
204 """ sort by specified fields (see fields())
199 """ sort by specified fields (see fields())
205
200
206 Example::
201 Example::
207
202
208 a.sort(1, nums = True)
203 a.sort(1, nums = True)
209
204
210 Sorts a by second field, in numerical order (so that 21 > 3)
205 Sorts a by second field, in numerical order (so that 21 > 3)
211
206
212 """
207 """
213
208
214 #decorate, sort, undecorate
209 #decorate, sort, undecorate
215 if field is not None:
210 if field is not None:
216 dsu = [[SList([line]).fields(field), line] for line in self]
211 dsu = [[SList([line]).fields(field), line] for line in self]
217 else:
212 else:
218 dsu = [[line, line] for line in self]
213 dsu = [[line, line] for line in self]
219 if nums:
214 if nums:
220 for i in range(len(dsu)):
215 for i in range(len(dsu)):
221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
216 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 try:
217 try:
223 n = int(numstr)
218 n = int(numstr)
224 except ValueError:
219 except ValueError:
225 n = 0
220 n = 0
226 dsu[i][0] = n
221 dsu[i][0] = n
227
222
228
223
229 dsu.sort()
224 dsu.sort()
230 return SList([t[1] for t in dsu])
225 return SList([t[1] for t in dsu])
231
226
232
227
233 # FIXME: We need to reimplement type specific displayhook and then add this
228 # FIXME: We need to reimplement type specific displayhook and then add this
234 # back as a custom printer. This should also be moved outside utils into the
229 # back as a custom printer. This should also be moved outside utils into the
235 # core.
230 # core.
236
231
237 # def print_slist(arg):
232 # def print_slist(arg):
238 # """ Prettier (non-repr-like) and more informative printer for SList """
233 # """ Prettier (non-repr-like) and more informative printer for SList """
239 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
234 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240 # if hasattr(arg, 'hideonce') and arg.hideonce:
235 # if hasattr(arg, 'hideonce') and arg.hideonce:
241 # arg.hideonce = False
236 # arg.hideonce = False
242 # return
237 # return
243 #
238 #
244 # nlprint(arg) # This was a nested list printer, now removed.
239 # nlprint(arg) # This was a nested list printer, now removed.
245 #
240 #
246 # print_slist = result_display.register(SList)(print_slist)
241 # print_slist = result_display.register(SList)(print_slist)
247
242
248
243
249 def indent(instr,nspaces=4, ntabs=0, flatten=False):
244 def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 """Indent a string a given number of spaces or tabstops.
245 """Indent a string a given number of spaces or tabstops.
251
246
252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
247 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
253
248
254 Parameters
249 Parameters
255 ----------
250 ----------
256 instr : basestring
251 instr : basestring
257 The string to be indented.
252 The string to be indented.
258 nspaces : int (default: 4)
253 nspaces : int (default: 4)
259 The number of spaces to be indented.
254 The number of spaces to be indented.
260 ntabs : int (default: 0)
255 ntabs : int (default: 0)
261 The number of tabs to be indented.
256 The number of tabs to be indented.
262 flatten : bool (default: False)
257 flatten : bool (default: False)
263 Whether to scrub existing indentation. If True, all lines will be
258 Whether to scrub existing indentation. If True, all lines will be
264 aligned to the same indentation. If False, existing indentation will
259 aligned to the same indentation. If False, existing indentation will
265 be strictly increased.
260 be strictly increased.
266
261
267 Returns
262 Returns
268 -------
263 -------
269 str|unicode : string indented by ntabs and nspaces.
264 str|unicode : string indented by ntabs and nspaces.
270
265
271 """
266 """
272 if instr is None:
267 if instr is None:
273 return
268 return
274 ind = '\t'*ntabs+' '*nspaces
269 ind = '\t'*ntabs+' '*nspaces
275 if flatten:
270 if flatten:
276 pat = re.compile(r'^\s*', re.MULTILINE)
271 pat = re.compile(r'^\s*', re.MULTILINE)
277 else:
272 else:
278 pat = re.compile(r'^', re.MULTILINE)
273 pat = re.compile(r'^', re.MULTILINE)
279 outstr = re.sub(pat, ind, instr)
274 outstr = re.sub(pat, ind, instr)
280 if outstr.endswith(os.linesep+ind):
275 if outstr.endswith(os.linesep+ind):
281 return outstr[:-len(ind)]
276 return outstr[:-len(ind)]
282 else:
277 else:
283 return outstr
278 return outstr
284
279
285
280
286 def list_strings(arg):
281 def list_strings(arg):
287 """Always return a list of strings, given a string or list of strings
282 """Always return a list of strings, given a string or list of strings
288 as input.
283 as input.
289
284
290 Examples
285 Examples
291 --------
286 --------
292 ::
287 ::
293
288
294 In [7]: list_strings('A single string')
289 In [7]: list_strings('A single string')
295 Out[7]: ['A single string']
290 Out[7]: ['A single string']
296
291
297 In [8]: list_strings(['A single string in a list'])
292 In [8]: list_strings(['A single string in a list'])
298 Out[8]: ['A single string in a list']
293 Out[8]: ['A single string in a list']
299
294
300 In [9]: list_strings(['A','list','of','strings'])
295 In [9]: list_strings(['A','list','of','strings'])
301 Out[9]: ['A', 'list', 'of', 'strings']
296 Out[9]: ['A', 'list', 'of', 'strings']
302 """
297 """
303
298
304 if isinstance(arg, str):
299 if isinstance(arg, str):
305 return [arg]
300 return [arg]
306 else:
301 else:
307 return arg
302 return arg
308
303
309
304
310 def marquee(txt='',width=78,mark='*'):
305 def marquee(txt='',width=78,mark='*'):
311 """Return the input string centered in a 'marquee'.
306 """Return the input string centered in a 'marquee'.
312
307
313 Examples
308 Examples
314 --------
309 --------
315 ::
310 ::
316
311
317 In [16]: marquee('A test',40)
312 In [16]: marquee('A test',40)
318 Out[16]: '**************** A test ****************'
313 Out[16]: '**************** A test ****************'
319
314
320 In [17]: marquee('A test',40,'-')
315 In [17]: marquee('A test',40,'-')
321 Out[17]: '---------------- A test ----------------'
316 Out[17]: '---------------- A test ----------------'
322
317
323 In [18]: marquee('A test',40,' ')
318 In [18]: marquee('A test',40,' ')
324 Out[18]: ' A test '
319 Out[18]: ' A test '
325
320
326 """
321 """
327 if not txt:
322 if not txt:
328 return (mark*width)[:width]
323 return (mark*width)[:width]
329 nmark = (width-len(txt)-2)//len(mark)//2
324 nmark = (width-len(txt)-2)//len(mark)//2
330 if nmark < 0: nmark =0
325 if nmark < 0: nmark =0
331 marks = mark*nmark
326 marks = mark*nmark
332 return '%s %s %s' % (marks,txt,marks)
327 return '%s %s %s' % (marks,txt,marks)
333
328
334
329
335 ini_spaces_re = re.compile(r'^(\s+)')
330 ini_spaces_re = re.compile(r'^(\s+)')
336
331
337 def num_ini_spaces(strng):
332 def num_ini_spaces(strng):
338 """Return the number of initial spaces in a string"""
333 """Return the number of initial spaces in a string"""
339
334 warnings.warn(
335 "`num_ini_spaces` is Pending Deprecation since IPython 8.17."
336 "It is considered fro removal in in future version. "
337 "Please open an issue if you believe it should be kept.",
338 stacklevel=2,
339 category=PendingDeprecationWarning,
340 )
340 ini_spaces = ini_spaces_re.match(strng)
341 ini_spaces = ini_spaces_re.match(strng)
341 if ini_spaces:
342 if ini_spaces:
342 return ini_spaces.end()
343 return ini_spaces.end()
343 else:
344 else:
344 return 0
345 return 0
345
346
346
347
347 def format_screen(strng):
348 def format_screen(strng):
348 """Format a string for screen printing.
349 """Format a string for screen printing.
349
350
350 This removes some latex-type format codes."""
351 This removes some latex-type format codes."""
351 # Paragraph continue
352 # Paragraph continue
352 par_re = re.compile(r'\\$',re.MULTILINE)
353 par_re = re.compile(r'\\$',re.MULTILINE)
353 strng = par_re.sub('',strng)
354 strng = par_re.sub('',strng)
354 return strng
355 return strng
355
356
356
357
357 def dedent(text):
358 def dedent(text):
358 """Equivalent of textwrap.dedent that ignores unindented first line.
359 """Equivalent of textwrap.dedent that ignores unindented first line.
359
360
360 This means it will still dedent strings like:
361 This means it will still dedent strings like:
361 '''foo
362 '''foo
362 is a bar
363 is a bar
363 '''
364 '''
364
365
365 For use in wrap_paragraphs.
366 For use in wrap_paragraphs.
366 """
367 """
367
368
368 if text.startswith('\n'):
369 if text.startswith('\n'):
369 # text starts with blank line, don't ignore the first line
370 # text starts with blank line, don't ignore the first line
370 return textwrap.dedent(text)
371 return textwrap.dedent(text)
371
372
372 # split first line
373 # split first line
373 splits = text.split('\n',1)
374 splits = text.split('\n',1)
374 if len(splits) == 1:
375 if len(splits) == 1:
375 # only one line
376 # only one line
376 return textwrap.dedent(text)
377 return textwrap.dedent(text)
377
378
378 first, rest = splits
379 first, rest = splits
379 # dedent everything but the first line
380 # dedent everything but the first line
380 rest = textwrap.dedent(rest)
381 rest = textwrap.dedent(rest)
381 return '\n'.join([first, rest])
382 return '\n'.join([first, rest])
382
383
383
384
384 def wrap_paragraphs(text, ncols=80):
385 def wrap_paragraphs(text, ncols=80):
385 """Wrap multiple paragraphs to fit a specified width.
386 """Wrap multiple paragraphs to fit a specified width.
386
387
387 This is equivalent to textwrap.wrap, but with support for multiple
388 This is equivalent to textwrap.wrap, but with support for multiple
388 paragraphs, as separated by empty lines.
389 paragraphs, as separated by empty lines.
389
390
390 Returns
391 Returns
391 -------
392 -------
392 list of complete paragraphs, wrapped to fill `ncols` columns.
393 list of complete paragraphs, wrapped to fill `ncols` columns.
393 """
394 """
395 warnings.warn(
396 "`wrap_paragraphs` is Pending Deprecation since IPython 8.17."
397 "It is considered fro removal in in future version. "
398 "Please open an issue if you believe it should be kept.",
399 stacklevel=2,
400 category=PendingDeprecationWarning,
401 )
394 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
402 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
395 text = dedent(text).strip()
403 text = dedent(text).strip()
396 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
404 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
397 out_ps = []
405 out_ps = []
398 indent_re = re.compile(r'\n\s+', re.MULTILINE)
406 indent_re = re.compile(r'\n\s+', re.MULTILINE)
399 for p in paragraphs:
407 for p in paragraphs:
400 # presume indentation that survives dedent is meaningful formatting,
408 # presume indentation that survives dedent is meaningful formatting,
401 # so don't fill unless text is flush.
409 # so don't fill unless text is flush.
402 if indent_re.search(p) is None:
410 if indent_re.search(p) is None:
403 # wrap paragraph
411 # wrap paragraph
404 p = textwrap.fill(p, ncols)
412 p = textwrap.fill(p, ncols)
405 out_ps.append(p)
413 out_ps.append(p)
406 return out_ps
414 return out_ps
407
415
408
416
409 def strip_email_quotes(text):
417 def strip_email_quotes(text):
410 """Strip leading email quotation characters ('>').
418 """Strip leading email quotation characters ('>').
411
419
412 Removes any combination of leading '>' interspersed with whitespace that
420 Removes any combination of leading '>' interspersed with whitespace that
413 appears *identically* in all lines of the input text.
421 appears *identically* in all lines of the input text.
414
422
415 Parameters
423 Parameters
416 ----------
424 ----------
417 text : str
425 text : str
418
426
419 Examples
427 Examples
420 --------
428 --------
421
429
422 Simple uses::
430 Simple uses::
423
431
424 In [2]: strip_email_quotes('> > text')
432 In [2]: strip_email_quotes('> > text')
425 Out[2]: 'text'
433 Out[2]: 'text'
426
434
427 In [3]: strip_email_quotes('> > text\\n> > more')
435 In [3]: strip_email_quotes('> > text\\n> > more')
428 Out[3]: 'text\\nmore'
436 Out[3]: 'text\\nmore'
429
437
430 Note how only the common prefix that appears in all lines is stripped::
438 Note how only the common prefix that appears in all lines is stripped::
431
439
432 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
440 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
433 Out[4]: '> text\\n> more\\nmore...'
441 Out[4]: '> text\\n> more\\nmore...'
434
442
435 So if any line has no quote marks ('>'), then none are stripped from any
443 So if any line has no quote marks ('>'), then none are stripped from any
436 of them ::
444 of them ::
437
445
438 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
446 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
439 Out[5]: '> > text\\n> > more\\nlast different'
447 Out[5]: '> > text\\n> > more\\nlast different'
440 """
448 """
441 lines = text.splitlines()
449 lines = text.splitlines()
442 strip_len = 0
450 strip_len = 0
443
451
444 for characters in zip(*lines):
452 for characters in zip(*lines):
445 # Check if all characters in this position are the same
453 # Check if all characters in this position are the same
446 if len(set(characters)) > 1:
454 if len(set(characters)) > 1:
447 break
455 break
448 prefix_char = characters[0]
456 prefix_char = characters[0]
449
457
450 if prefix_char in string.whitespace or prefix_char == ">":
458 if prefix_char in string.whitespace or prefix_char == ">":
451 strip_len += 1
459 strip_len += 1
452 else:
460 else:
453 break
461 break
454
462
455 text = "\n".join([ln[strip_len:] for ln in lines])
463 text = "\n".join([ln[strip_len:] for ln in lines])
456 return text
464 return text
457
465
458
466
459 def strip_ansi(source):
467 def strip_ansi(source):
460 """
468 """
461 Remove ansi escape codes from text.
469 Remove ansi escape codes from text.
462
470
463 Parameters
471 Parameters
464 ----------
472 ----------
465 source : str
473 source : str
466 Source to remove the ansi from
474 Source to remove the ansi from
467 """
475 """
476 warnings.warn(
477 "`strip_ansi` is Pending Deprecation since IPython 8.17."
478 "It is considered fro removal in in future version. "
479 "Please open an issue if you believe it should be kept.",
480 stacklevel=2,
481 category=PendingDeprecationWarning,
482 )
483
468 return re.sub(r'\033\[(\d|;)+?m', '', source)
484 return re.sub(r'\033\[(\d|;)+?m', '', source)
469
485
470
486
471 class EvalFormatter(Formatter):
487 class EvalFormatter(Formatter):
472 """A String Formatter that allows evaluation of simple expressions.
488 """A String Formatter that allows evaluation of simple expressions.
473
489
474 Note that this version interprets a `:` as specifying a format string (as per
490 Note that this version interprets a `:` as specifying a format string (as per
475 standard string formatting), so if slicing is required, you must explicitly
491 standard string formatting), so if slicing is required, you must explicitly
476 create a slice.
492 create a slice.
477
493
478 This is to be used in templating cases, such as the parallel batch
494 This is to be used in templating cases, such as the parallel batch
479 script templates, where simple arithmetic on arguments is useful.
495 script templates, where simple arithmetic on arguments is useful.
480
496
481 Examples
497 Examples
482 --------
498 --------
483 ::
499 ::
484
500
485 In [1]: f = EvalFormatter()
501 In [1]: f = EvalFormatter()
486 In [2]: f.format('{n//4}', n=8)
502 In [2]: f.format('{n//4}', n=8)
487 Out[2]: '2'
503 Out[2]: '2'
488
504
489 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
505 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
490 Out[3]: 'll'
506 Out[3]: 'll'
491 """
507 """
492 def get_field(self, name, args, kwargs):
508 def get_field(self, name, args, kwargs):
493 v = eval(name, kwargs)
509 v = eval(name, kwargs)
494 return v, name
510 return v, name
495
511
496 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
512 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
497 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
513 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
498 # above, it should be possible to remove FullEvalFormatter.
514 # above, it should be possible to remove FullEvalFormatter.
499
515
500 class FullEvalFormatter(Formatter):
516 class FullEvalFormatter(Formatter):
501 """A String Formatter that allows evaluation of simple expressions.
517 """A String Formatter that allows evaluation of simple expressions.
502
518
503 Any time a format key is not found in the kwargs,
519 Any time a format key is not found in the kwargs,
504 it will be tried as an expression in the kwargs namespace.
520 it will be tried as an expression in the kwargs namespace.
505
521
506 Note that this version allows slicing using [1:2], so you cannot specify
522 Note that this version allows slicing using [1:2], so you cannot specify
507 a format string. Use :class:`EvalFormatter` to permit format strings.
523 a format string. Use :class:`EvalFormatter` to permit format strings.
508
524
509 Examples
525 Examples
510 --------
526 --------
511 ::
527 ::
512
528
513 In [1]: f = FullEvalFormatter()
529 In [1]: f = FullEvalFormatter()
514 In [2]: f.format('{n//4}', n=8)
530 In [2]: f.format('{n//4}', n=8)
515 Out[2]: '2'
531 Out[2]: '2'
516
532
517 In [3]: f.format('{list(range(5))[2:4]}')
533 In [3]: f.format('{list(range(5))[2:4]}')
518 Out[3]: '[2, 3]'
534 Out[3]: '[2, 3]'
519
535
520 In [4]: f.format('{3*2}')
536 In [4]: f.format('{3*2}')
521 Out[4]: '6'
537 Out[4]: '6'
522 """
538 """
523 # copied from Formatter._vformat with minor changes to allow eval
539 # copied from Formatter._vformat with minor changes to allow eval
524 # and replace the format_spec code with slicing
540 # and replace the format_spec code with slicing
525 def vformat(self, format_string:str, args, kwargs)->str:
541 def vformat(self, format_string:str, args, kwargs)->str:
526 result = []
542 result = []
527 for literal_text, field_name, format_spec, conversion in \
543 for literal_text, field_name, format_spec, conversion in \
528 self.parse(format_string):
544 self.parse(format_string):
529
545
530 # output the literal text
546 # output the literal text
531 if literal_text:
547 if literal_text:
532 result.append(literal_text)
548 result.append(literal_text)
533
549
534 # if there's a field, output it
550 # if there's a field, output it
535 if field_name is not None:
551 if field_name is not None:
536 # this is some markup, find the object and do
552 # this is some markup, find the object and do
537 # the formatting
553 # the formatting
538
554
539 if format_spec:
555 if format_spec:
540 # override format spec, to allow slicing:
556 # override format spec, to allow slicing:
541 field_name = ':'.join([field_name, format_spec])
557 field_name = ':'.join([field_name, format_spec])
542
558
543 # eval the contents of the field for the object
559 # eval the contents of the field for the object
544 # to be formatted
560 # to be formatted
545 obj = eval(field_name, kwargs)
561 obj = eval(field_name, kwargs)
546
562
547 # do any conversion on the resulting object
563 # do any conversion on the resulting object
548 obj = self.convert_field(obj, conversion)
564 obj = self.convert_field(obj, conversion)
549
565
550 # format the object and append to the result
566 # format the object and append to the result
551 result.append(self.format_field(obj, ''))
567 result.append(self.format_field(obj, ''))
552
568
553 return ''.join(result)
569 return ''.join(result)
554
570
555
571
556 class DollarFormatter(FullEvalFormatter):
572 class DollarFormatter(FullEvalFormatter):
557 """Formatter allowing Itpl style $foo replacement, for names and attribute
573 """Formatter allowing Itpl style $foo replacement, for names and attribute
558 access only. Standard {foo} replacement also works, and allows full
574 access only. Standard {foo} replacement also works, and allows full
559 evaluation of its arguments.
575 evaluation of its arguments.
560
576
561 Examples
577 Examples
562 --------
578 --------
563 ::
579 ::
564
580
565 In [1]: f = DollarFormatter()
581 In [1]: f = DollarFormatter()
566 In [2]: f.format('{n//4}', n=8)
582 In [2]: f.format('{n//4}', n=8)
567 Out[2]: '2'
583 Out[2]: '2'
568
584
569 In [3]: f.format('23 * 76 is $result', result=23*76)
585 In [3]: f.format('23 * 76 is $result', result=23*76)
570 Out[3]: '23 * 76 is 1748'
586 Out[3]: '23 * 76 is 1748'
571
587
572 In [4]: f.format('$a or {b}', a=1, b=2)
588 In [4]: f.format('$a or {b}', a=1, b=2)
573 Out[4]: '1 or 2'
589 Out[4]: '1 or 2'
574 """
590 """
575 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
591 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
576 def parse(self, fmt_string):
592 def parse(self, fmt_string):
577 for literal_txt, field_name, format_spec, conversion \
593 for literal_txt, field_name, format_spec, conversion \
578 in Formatter.parse(self, fmt_string):
594 in Formatter.parse(self, fmt_string):
579
595
580 # Find $foo patterns in the literal text.
596 # Find $foo patterns in the literal text.
581 continue_from = 0
597 continue_from = 0
582 txt = ""
598 txt = ""
583 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
599 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
584 new_txt, new_field = m.group(1,2)
600 new_txt, new_field = m.group(1,2)
585 # $$foo --> $foo
601 # $$foo --> $foo
586 if new_field.startswith("$"):
602 if new_field.startswith("$"):
587 txt += new_txt + new_field
603 txt += new_txt + new_field
588 else:
604 else:
589 yield (txt + new_txt, new_field, "", None)
605 yield (txt + new_txt, new_field, "", None)
590 txt = ""
606 txt = ""
591 continue_from = m.end()
607 continue_from = m.end()
592
608
593 # Re-yield the {foo} style pattern
609 # Re-yield the {foo} style pattern
594 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
595
611
596 def __repr__(self):
612 def __repr__(self):
597 return "<DollarFormatter>"
613 return "<DollarFormatter>"
598
614
599 #-----------------------------------------------------------------------------
615 #-----------------------------------------------------------------------------
600 # Utils to columnize a list of string
616 # Utils to columnize a list of string
601 #-----------------------------------------------------------------------------
617 #-----------------------------------------------------------------------------
602
618
603 def _col_chunks(l, max_rows, row_first=False):
619 def _col_chunks(l, max_rows, row_first=False):
604 """Yield successive max_rows-sized column chunks from l."""
620 """Yield successive max_rows-sized column chunks from l."""
605 if row_first:
621 if row_first:
606 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
622 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
607 for i in range(ncols):
623 for i in range(ncols):
608 yield [l[j] for j in range(i, len(l), ncols)]
624 yield [l[j] for j in range(i, len(l), ncols)]
609 else:
625 else:
610 for i in range(0, len(l), max_rows):
626 for i in range(0, len(l), max_rows):
611 yield l[i:(i + max_rows)]
627 yield l[i:(i + max_rows)]
612
628
613
629
614 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
630 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
615 """Calculate optimal info to columnize a list of string"""
631 """Calculate optimal info to columnize a list of string"""
616 for max_rows in range(1, len(rlist) + 1):
632 for max_rows in range(1, len(rlist) + 1):
617 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
633 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
618 sumlength = sum(col_widths)
634 sumlength = sum(col_widths)
619 ncols = len(col_widths)
635 ncols = len(col_widths)
620 if sumlength + separator_size * (ncols - 1) <= displaywidth:
636 if sumlength + separator_size * (ncols - 1) <= displaywidth:
621 break
637 break
622 return {'num_columns': ncols,
638 return {'num_columns': ncols,
623 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
639 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
624 'max_rows': max_rows,
640 'max_rows': max_rows,
625 'column_widths': col_widths
641 'column_widths': col_widths
626 }
642 }
627
643
628
644
629 def _get_or_default(mylist, i, default=None):
645 def _get_or_default(mylist, i, default=None):
630 """return list item number, or default if don't exist"""
646 """return list item number, or default if don't exist"""
631 if i >= len(mylist):
647 if i >= len(mylist):
632 return default
648 return default
633 else :
649 else :
634 return mylist[i]
650 return mylist[i]
635
651
636
652
637 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
653 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
638 """Returns a nested list, and info to columnize items
654 """Returns a nested list, and info to columnize items
639
655
640 Parameters
656 Parameters
641 ----------
657 ----------
642 items
658 items
643 list of strings to columize
659 list of strings to columize
644 row_first : (default False)
660 row_first : (default False)
645 Whether to compute columns for a row-first matrix instead of
661 Whether to compute columns for a row-first matrix instead of
646 column-first (default).
662 column-first (default).
647 empty : (default None)
663 empty : (default None)
648 default value to fill list if needed
664 default value to fill list if needed
649 separator_size : int (default=2)
665 separator_size : int (default=2)
650 How much characters will be used as a separation between each columns.
666 How much characters will be used as a separation between each columns.
651 displaywidth : int (default=80)
667 displaywidth : int (default=80)
652 The width of the area onto which the columns should enter
668 The width of the area onto which the columns should enter
653
669
654 Returns
670 Returns
655 -------
671 -------
656 strings_matrix
672 strings_matrix
657 nested list of string, the outer most list contains as many list as
673 nested list of string, the outer most list contains as many list as
658 rows, the innermost lists have each as many element as columns. If the
674 rows, the innermost lists have each as many element as columns. If the
659 total number of elements in `items` does not equal the product of
675 total number of elements in `items` does not equal the product of
660 rows*columns, the last element of some lists are filled with `None`.
676 rows*columns, the last element of some lists are filled with `None`.
661 dict_info
677 dict_info
662 some info to make columnize easier:
678 some info to make columnize easier:
663
679
664 num_columns
680 num_columns
665 number of columns
681 number of columns
666 max_rows
682 max_rows
667 maximum number of rows (final number may be less)
683 maximum number of rows (final number may be less)
668 column_widths
684 column_widths
669 list of with of each columns
685 list of with of each columns
670 optimal_separator_width
686 optimal_separator_width
671 best separator width between columns
687 best separator width between columns
672
688
673 Examples
689 Examples
674 --------
690 --------
675 ::
691 ::
676
692
677 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
693 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
678 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
694 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
679 In [3]: list
695 In [3]: list
680 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
696 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
681 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
697 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
682 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
698 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
683 Out[5]: True
699 Out[5]: True
684 """
700 """
701 warnings.warn(
702 "`compute_item_matrix` is Pending Deprecation since IPython 8.17."
703 "It is considered fro removal in in future version. "
704 "Please open an issue if you believe it should be kept.",
705 stacklevel=2,
706 category=PendingDeprecationWarning,
707 )
685 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
708 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
686 nrow, ncol = info['max_rows'], info['num_columns']
709 nrow, ncol = info['max_rows'], info['num_columns']
687 if row_first:
710 if row_first:
688 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
711 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
689 else:
712 else:
690 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
713 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
691
714
692
715
693 def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False):
716 def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False):
694 """Transform a list of strings into a single string with columns.
717 """Transform a list of strings into a single string with columns.
695
718
696 Parameters
719 Parameters
697 ----------
720 ----------
698 items : sequence of strings
721 items : sequence of strings
699 The strings to process.
722 The strings to process.
700 row_first : (default False)
723 row_first : (default False)
701 Whether to compute columns for a row-first matrix instead of
724 Whether to compute columns for a row-first matrix instead of
702 column-first (default).
725 column-first (default).
703 separator : str, optional [default is two spaces]
726 separator : str, optional [default is two spaces]
704 The string that separates columns.
727 The string that separates columns.
705 displaywidth : int, optional [default is 80]
728 displaywidth : int, optional [default is 80]
706 Width of the display in number of characters.
729 Width of the display in number of characters.
707
730
708 Returns
731 Returns
709 -------
732 -------
710 The formatted string.
733 The formatted string.
711 """
734 """
735 warnings.warn(
736 "`columnize` is Pending Deprecation since IPython 8.17."
737 "It is considered fro removal in in future version. "
738 "Please open an issue if you believe it should be kept.",
739 stacklevel=2,
740 category=PendingDeprecationWarning,
741 )
712 if not items:
742 if not items:
713 return '\n'
743 return '\n'
714 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
744 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
715 if spread:
745 if spread:
716 separator = separator.ljust(int(info['optimal_separator_width']))
746 separator = separator.ljust(int(info['optimal_separator_width']))
717 fmatrix = [filter(None, x) for x in matrix]
747 fmatrix = [filter(None, x) for x in matrix]
718 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
748 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
719 return '\n'.join(map(sjoin, fmatrix))+'\n'
749 return '\n'.join(map(sjoin, fmatrix))+'\n'
720
750
721
751
722 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
752 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
723 """
753 """
724 Return a string with a natural enumeration of items
754 Return a string with a natural enumeration of items
725
755
726 >>> get_text_list(['a', 'b', 'c', 'd'])
756 >>> get_text_list(['a', 'b', 'c', 'd'])
727 'a, b, c and d'
757 'a, b, c and d'
728 >>> get_text_list(['a', 'b', 'c'], ' or ')
758 >>> get_text_list(['a', 'b', 'c'], ' or ')
729 'a, b or c'
759 'a, b or c'
730 >>> get_text_list(['a', 'b', 'c'], ', ')
760 >>> get_text_list(['a', 'b', 'c'], ', ')
731 'a, b, c'
761 'a, b, c'
732 >>> get_text_list(['a', 'b'], ' or ')
762 >>> get_text_list(['a', 'b'], ' or ')
733 'a or b'
763 'a or b'
734 >>> get_text_list(['a'])
764 >>> get_text_list(['a'])
735 'a'
765 'a'
736 >>> get_text_list([])
766 >>> get_text_list([])
737 ''
767 ''
738 >>> get_text_list(['a', 'b'], wrap_item_with="`")
768 >>> get_text_list(['a', 'b'], wrap_item_with="`")
739 '`a` and `b`'
769 '`a` and `b`'
740 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
770 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
741 'a + b + c = d'
771 'a + b + c = d'
742 """
772 """
743 if len(list_) == 0:
773 if len(list_) == 0:
744 return ''
774 return ''
745 if wrap_item_with:
775 if wrap_item_with:
746 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
776 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
747 item in list_]
777 item in list_]
748 if len(list_) == 1:
778 if len(list_) == 1:
749 return list_[0]
779 return list_[0]
750 return '%s%s%s' % (
780 return '%s%s%s' % (
751 sep.join(i for i in list_[:-1]),
781 sep.join(i for i in list_[:-1]),
752 last_sep, list_[-1])
782 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now