##// END OF EJS Templates
Deprecation and removal for 8.17...
Matthias Bussonnier -
Show More
@@ -1,787 +1,793 b''
1 1 """DEPRECATED: Input handling and transformation machinery.
2 2
3 3 This module was deprecated in IPython 7.0, in favour of inputtransformer2.
4 4
5 5 The first class in this module, :class:`InputSplitter`, is designed to tell when
6 6 input from a line-oriented frontend is complete and should be executed, and when
7 7 the user should be prompted for another line of code instead. The name 'input
8 8 splitter' is largely for historical reasons.
9 9
10 10 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
11 11 with full support for the extended IPython syntax (magics, system calls, etc).
12 12 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
13 13 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
14 14 and stores the results.
15 15
16 16 For more details, see the class docstrings below.
17 17 """
18 18
19 19 from warnings import warn
20 20
21 21 warn('IPython.core.inputsplitter is deprecated since IPython 7 in favor of `IPython.core.inputtransformer2`',
22 22 DeprecationWarning)
23 23
24 24 # Copyright (c) IPython Development Team.
25 25 # Distributed under the terms of the Modified BSD License.
26 26 import ast
27 27 import codeop
28 28 import io
29 29 import re
30 30 import sys
31 31 import tokenize
32 32 import warnings
33 33
34 34 from typing import List
35 35
36 36 from IPython.core.inputtransformer import (leading_indent,
37 37 classic_prompt,
38 38 ipy_prompt,
39 39 cellmagic,
40 40 assemble_logical_lines,
41 41 help_end,
42 42 escaped_commands,
43 43 assign_from_magic,
44 44 assign_from_system,
45 45 assemble_python_lines,
46 46 )
47 47 from IPython.utils import tokenutil
48 48
49 49 # These are available in this module for backwards compatibility.
50 50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
51 51 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
52 52 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
53 53
54 54 #-----------------------------------------------------------------------------
55 55 # Utilities
56 56 #-----------------------------------------------------------------------------
57 57
58 58 # FIXME: These are general-purpose utilities that later can be moved to the
59 59 # general ward. Kept here for now because we're being very strict about test
60 60 # coverage with this code, and this lets us ensure that we keep 100% coverage
61 61 # while developing.
62 62
63 63 # compiled regexps for autoindent management
64 64 dedent_re = re.compile('|'.join([
65 65 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
66 66 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
67 67 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
68 68 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
69 69 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
70 70 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
71 71 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
72 72 ]))
73 73 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
74 74
75 75 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
76 76 # before pure comments
77 77 comment_line_re = re.compile(r'^\s*\#')
78 78
79 79
80 80 def num_ini_spaces(s):
81 81 """Return the number of initial spaces in a string.
82 82
83 83 Note that tabs are counted as a single space. For now, we do *not* support
84 84 mixing of tabs and spaces in the user's input.
85 85
86 86 Parameters
87 87 ----------
88 88 s : string
89 89
90 90 Returns
91 91 -------
92 92 n : int
93 93 """
94
94 warnings.warn(
95 "`num_ini_spaces` is Pending Deprecation since IPython 8.17."
96 "It is considered fro removal in in future version. "
97 "Please open an issue if you believe it should be kept.",
98 stacklevel=2,
99 category=PendingDeprecationWarning,
100 )
95 101 ini_spaces = ini_spaces_re.match(s)
96 102 if ini_spaces:
97 103 return ini_spaces.end()
98 104 else:
99 105 return 0
100 106
101 107 # Fake token types for partial_tokenize:
102 108 INCOMPLETE_STRING = tokenize.N_TOKENS
103 109 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
104 110
105 111 # The 2 classes below have the same API as TokenInfo, but don't try to look up
106 112 # a token type name that they won't find.
107 113 class IncompleteString:
108 114 type = exact_type = INCOMPLETE_STRING
109 115 def __init__(self, s, start, end, line):
110 116 self.s = s
111 117 self.start = start
112 118 self.end = end
113 119 self.line = line
114 120
115 121 class InMultilineStatement:
116 122 type = exact_type = IN_MULTILINE_STATEMENT
117 123 def __init__(self, pos, line):
118 124 self.s = ''
119 125 self.start = self.end = pos
120 126 self.line = line
121 127
122 128 def partial_tokens(s):
123 129 """Iterate over tokens from a possibly-incomplete string of code.
124 130
125 131 This adds two special token types: INCOMPLETE_STRING and
126 132 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
127 133 represent the two main ways for code to be incomplete.
128 134 """
129 135 readline = io.StringIO(s).readline
130 136 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
131 137 try:
132 138 for token in tokenutil.generate_tokens_catch_errors(readline):
133 139 yield token
134 140 except tokenize.TokenError as e:
135 141 # catch EOF error
136 142 lines = s.splitlines(keepends=True)
137 143 end = len(lines), len(lines[-1])
138 144 if 'multi-line string' in e.args[0]:
139 145 l, c = start = token.end
140 146 s = lines[l-1][c:] + ''.join(lines[l:])
141 147 yield IncompleteString(s, start, end, lines[-1])
142 148 elif 'multi-line statement' in e.args[0]:
143 149 yield InMultilineStatement(end, lines[-1])
144 150 else:
145 151 raise
146 152
147 153 def find_next_indent(code):
148 154 """Find the number of spaces for the next line of indentation"""
149 155 tokens = list(partial_tokens(code))
150 156 if tokens[-1].type == tokenize.ENDMARKER:
151 157 tokens.pop()
152 158 if not tokens:
153 159 return 0
154 160
155 161 while tokens[-1].type in {
156 162 tokenize.DEDENT,
157 163 tokenize.NEWLINE,
158 164 tokenize.COMMENT,
159 165 tokenize.ERRORTOKEN,
160 166 }:
161 167 tokens.pop()
162 168
163 169 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
164 170 # of input. We need to remove those if we're in a multiline statement
165 171 if tokens[-1].type == IN_MULTILINE_STATEMENT:
166 172 while tokens[-2].type in {tokenize.NL}:
167 173 tokens.pop(-2)
168 174
169 175
170 176 if tokens[-1].type == INCOMPLETE_STRING:
171 177 # Inside a multiline string
172 178 return 0
173 179
174 180 # Find the indents used before
175 181 prev_indents = [0]
176 182 def _add_indent(n):
177 183 if n != prev_indents[-1]:
178 184 prev_indents.append(n)
179 185
180 186 tokiter = iter(tokens)
181 187 for tok in tokiter:
182 188 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
183 189 _add_indent(tok.end[1])
184 190 elif (tok.type == tokenize.NL):
185 191 try:
186 192 _add_indent(next(tokiter).start[1])
187 193 except StopIteration:
188 194 break
189 195
190 196 last_indent = prev_indents.pop()
191 197
192 198 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
193 199 if tokens[-1].type == IN_MULTILINE_STATEMENT:
194 200 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
195 201 return last_indent + 4
196 202 return last_indent
197 203
198 204 if tokens[-1].exact_type == tokenize.COLON:
199 205 # Line ends with colon - indent
200 206 return last_indent + 4
201 207
202 208 if last_indent:
203 209 # Examine the last line for dedent cues - statements like return or
204 210 # raise which normally end a block of code.
205 211 last_line_starts = 0
206 212 for i, tok in enumerate(tokens):
207 213 if tok.type == tokenize.NEWLINE:
208 214 last_line_starts = i + 1
209 215
210 216 last_line_tokens = tokens[last_line_starts:]
211 217 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
212 218 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
213 219 # Find the most recent indentation less than the current level
214 220 for indent in reversed(prev_indents):
215 221 if indent < last_indent:
216 222 return indent
217 223
218 224 return last_indent
219 225
220 226
221 227 def last_blank(src):
222 228 """Determine if the input source ends in a blank.
223 229
224 230 A blank is either a newline or a line consisting of whitespace.
225 231
226 232 Parameters
227 233 ----------
228 234 src : string
229 235 A single or multiline string.
230 236 """
231 237 if not src: return False
232 238 ll = src.splitlines()[-1]
233 239 return (ll == '') or ll.isspace()
234 240
235 241
236 242 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
237 243 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
238 244
239 245 def last_two_blanks(src):
240 246 """Determine if the input source ends in two blanks.
241 247
242 248 A blank is either a newline or a line consisting of whitespace.
243 249
244 250 Parameters
245 251 ----------
246 252 src : string
247 253 A single or multiline string.
248 254 """
249 255 if not src: return False
250 256 # The logic here is tricky: I couldn't get a regexp to work and pass all
251 257 # the tests, so I took a different approach: split the source by lines,
252 258 # grab the last two and prepend '###\n' as a stand-in for whatever was in
253 259 # the body before the last two lines. Then, with that structure, it's
254 260 # possible to analyze with two regexps. Not the most elegant solution, but
255 261 # it works. If anyone tries to change this logic, make sure to validate
256 262 # the whole test suite first!
257 263 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
258 264 return (bool(last_two_blanks_re.match(new_src)) or
259 265 bool(last_two_blanks_re2.match(new_src)) )
260 266
261 267
262 268 def remove_comments(src):
263 269 """Remove all comments from input source.
264 270
265 271 Note: comments are NOT recognized inside of strings!
266 272
267 273 Parameters
268 274 ----------
269 275 src : string
270 276 A single or multiline input string.
271 277
272 278 Returns
273 279 -------
274 280 String with all Python comments removed.
275 281 """
276 282
277 283 return re.sub('#.*', '', src)
278 284
279 285
280 286 def get_input_encoding():
281 287 """Return the default standard input encoding.
282 288
283 289 If sys.stdin has no encoding, 'ascii' is returned."""
284 290 # There are strange environments for which sys.stdin.encoding is None. We
285 291 # ensure that a valid encoding is returned.
286 292 encoding = getattr(sys.stdin, 'encoding', None)
287 293 if encoding is None:
288 294 encoding = 'ascii'
289 295 return encoding
290 296
291 297 #-----------------------------------------------------------------------------
292 298 # Classes and functions for normal Python syntax handling
293 299 #-----------------------------------------------------------------------------
294 300
295 301 class InputSplitter(object):
296 302 r"""An object that can accumulate lines of Python source before execution.
297 303
298 304 This object is designed to be fed python source line-by-line, using
299 305 :meth:`push`. It will return on each push whether the currently pushed
300 306 code could be executed already. In addition, it provides a method called
301 307 :meth:`push_accepts_more` that can be used to query whether more input
302 308 can be pushed into a single interactive block.
303 309
304 310 This is a simple example of how an interactive terminal-based client can use
305 311 this tool::
306 312
307 313 isp = InputSplitter()
308 314 while isp.push_accepts_more():
309 315 indent = ' '*isp.indent_spaces
310 316 prompt = '>>> ' + indent
311 317 line = indent + raw_input(prompt)
312 318 isp.push(line)
313 319 print 'Input source was:\n', isp.source_reset(),
314 320 """
315 321 # A cache for storing the current indentation
316 322 # The first value stores the most recently processed source input
317 323 # The second value is the number of spaces for the current indentation
318 324 # If self.source matches the first value, the second value is a valid
319 325 # current indentation. Otherwise, the cache is invalid and the indentation
320 326 # must be recalculated.
321 327 _indent_spaces_cache = None, None
322 328 # String, indicating the default input encoding. It is computed by default
323 329 # at initialization time via get_input_encoding(), but it can be reset by a
324 330 # client with specific knowledge of the encoding.
325 331 encoding = ''
326 332 # String where the current full source input is stored, properly encoded.
327 333 # Reading this attribute is the normal way of querying the currently pushed
328 334 # source code, that has been properly encoded.
329 335 source = ''
330 336 # Code object corresponding to the current source. It is automatically
331 337 # synced to the source, so it can be queried at any time to obtain the code
332 338 # object; it will be None if the source doesn't compile to valid Python.
333 339 code = None
334 340
335 341 # Private attributes
336 342
337 343 # List with lines of input accumulated so far
338 344 _buffer: List[str]
339 345 # Command compiler
340 346 _compile: codeop.CommandCompiler
341 347 # Boolean indicating whether the current block is complete
342 348 _is_complete = None
343 349 # Boolean indicating whether the current block has an unrecoverable syntax error
344 350 _is_invalid = False
345 351
346 352 def __init__(self) -> None:
347 353 """Create a new InputSplitter instance."""
348 354 self._buffer = []
349 355 self._compile = codeop.CommandCompiler()
350 356 self.encoding = get_input_encoding()
351 357
352 358 def reset(self):
353 359 """Reset the input buffer and associated state."""
354 360 self._buffer[:] = []
355 361 self.source = ''
356 362 self.code = None
357 363 self._is_complete = False
358 364 self._is_invalid = False
359 365
360 366 def source_reset(self):
361 367 """Return the input source and perform a full reset.
362 368 """
363 369 out = self.source
364 370 self.reset()
365 371 return out
366 372
367 373 def check_complete(self, source):
368 374 """Return whether a block of code is ready to execute, or should be continued
369 375
370 376 This is a non-stateful API, and will reset the state of this InputSplitter.
371 377
372 378 Parameters
373 379 ----------
374 380 source : string
375 381 Python input code, which can be multiline.
376 382
377 383 Returns
378 384 -------
379 385 status : str
380 386 One of 'complete', 'incomplete', or 'invalid' if source is not a
381 387 prefix of valid code.
382 388 indent_spaces : int or None
383 389 The number of spaces by which to indent the next line of code. If
384 390 status is not 'incomplete', this is None.
385 391 """
386 392 self.reset()
387 393 try:
388 394 self.push(source)
389 395 except SyntaxError:
390 396 # Transformers in IPythonInputSplitter can raise SyntaxError,
391 397 # which push() will not catch.
392 398 return 'invalid', None
393 399 else:
394 400 if self._is_invalid:
395 401 return 'invalid', None
396 402 elif self.push_accepts_more():
397 403 return 'incomplete', self.get_indent_spaces()
398 404 else:
399 405 return 'complete', None
400 406 finally:
401 407 self.reset()
402 408
403 409 def push(self, lines:str) -> bool:
404 410 """Push one or more lines of input.
405 411
406 412 This stores the given lines and returns a status code indicating
407 413 whether the code forms a complete Python block or not.
408 414
409 415 Any exceptions generated in compilation are swallowed, but if an
410 416 exception was produced, the method returns True.
411 417
412 418 Parameters
413 419 ----------
414 420 lines : string
415 421 One or more lines of Python input.
416 422
417 423 Returns
418 424 -------
419 425 is_complete : boolean
420 426 True if the current input source (the result of the current input
421 427 plus prior inputs) forms a complete Python execution block. Note that
422 428 this value is also stored as a private attribute (``_is_complete``), so it
423 429 can be queried at any time.
424 430 """
425 431 assert isinstance(lines, str)
426 432 self._store(lines)
427 433 source = self.source
428 434
429 435 # Before calling _compile(), reset the code object to None so that if an
430 436 # exception is raised in compilation, we don't mislead by having
431 437 # inconsistent code/source attributes.
432 438 self.code, self._is_complete = None, None
433 439 self._is_invalid = False
434 440
435 441 # Honor termination lines properly
436 442 if source.endswith('\\\n'):
437 443 return False
438 444
439 445 try:
440 446 with warnings.catch_warnings():
441 447 warnings.simplefilter('error', SyntaxWarning)
442 448 self.code = self._compile(source, symbol="exec")
443 449 # Invalid syntax can produce any of a number of different errors from
444 450 # inside the compiler, so we have to catch them all. Syntax errors
445 451 # immediately produce a 'ready' block, so the invalid Python can be
446 452 # sent to the kernel for evaluation with possible ipython
447 453 # special-syntax conversion.
448 454 except (SyntaxError, OverflowError, ValueError, TypeError,
449 455 MemoryError, SyntaxWarning):
450 456 self._is_complete = True
451 457 self._is_invalid = True
452 458 else:
453 459 # Compilation didn't produce any exceptions (though it may not have
454 460 # given a complete code object)
455 461 self._is_complete = self.code is not None
456 462
457 463 return self._is_complete
458 464
459 465 def push_accepts_more(self):
460 466 """Return whether a block of interactive input can accept more input.
461 467
462 468 This method is meant to be used by line-oriented frontends, who need to
463 469 guess whether a block is complete or not based solely on prior and
464 470 current input lines. The InputSplitter considers it has a complete
465 471 interactive block and will not accept more input when either:
466 472
467 473 * A SyntaxError is raised
468 474
469 475 * The code is complete and consists of a single line or a single
470 476 non-compound statement
471 477
472 478 * The code is complete and has a blank line at the end
473 479
474 480 If the current input produces a syntax error, this method immediately
475 481 returns False but does *not* raise the syntax error exception, as
476 482 typically clients will want to send invalid syntax to an execution
477 483 backend which might convert the invalid syntax into valid Python via
478 484 one of the dynamic IPython mechanisms.
479 485 """
480 486
481 487 # With incomplete input, unconditionally accept more
482 488 # A syntax error also sets _is_complete to True - see push()
483 489 if not self._is_complete:
484 490 #print("Not complete") # debug
485 491 return True
486 492
487 493 # The user can make any (complete) input execute by leaving a blank line
488 494 last_line = self.source.splitlines()[-1]
489 495 if (not last_line) or last_line.isspace():
490 496 #print("Blank line") # debug
491 497 return False
492 498
493 499 # If there's just a single line or AST node, and we're flush left, as is
494 500 # the case after a simple statement such as 'a=1', we want to execute it
495 501 # straight away.
496 502 if self.get_indent_spaces() == 0:
497 503 if len(self.source.splitlines()) <= 1:
498 504 return False
499 505
500 506 try:
501 507 code_ast = ast.parse("".join(self._buffer))
502 508 except Exception:
503 509 #print("Can't parse AST") # debug
504 510 return False
505 511 else:
506 512 if len(code_ast.body) == 1 and \
507 513 not hasattr(code_ast.body[0], 'body'):
508 514 #print("Simple statement") # debug
509 515 return False
510 516
511 517 # General fallback - accept more code
512 518 return True
513 519
514 520 def get_indent_spaces(self):
515 521 sourcefor, n = self._indent_spaces_cache
516 522 if sourcefor == self.source:
517 523 return n
518 524
519 525 # self.source always has a trailing newline
520 526 n = find_next_indent(self.source[:-1])
521 527 self._indent_spaces_cache = (self.source, n)
522 528 return n
523 529
524 530 # Backwards compatibility. I think all code that used .indent_spaces was
525 531 # inside IPython, but we can leave this here until IPython 7 in case any
526 532 # other modules are using it. -TK, November 2017
527 533 indent_spaces = property(get_indent_spaces)
528 534
529 535 def _store(self, lines, buffer=None, store='source'):
530 536 """Store one or more lines of input.
531 537
532 538 If input lines are not newline-terminated, a newline is automatically
533 539 appended."""
534 540
535 541 if buffer is None:
536 542 buffer = self._buffer
537 543
538 544 if lines.endswith('\n'):
539 545 buffer.append(lines)
540 546 else:
541 547 buffer.append(lines+'\n')
542 548 setattr(self, store, self._set_source(buffer))
543 549
544 550 def _set_source(self, buffer):
545 551 return u''.join(buffer)
546 552
547 553
548 554 class IPythonInputSplitter(InputSplitter):
549 555 """An input splitter that recognizes all of IPython's special syntax."""
550 556
551 557 # String with raw, untransformed input.
552 558 source_raw = ''
553 559
554 560 # Flag to track when a transformer has stored input that it hasn't given
555 561 # back yet.
556 562 transformer_accumulating = False
557 563
558 564 # Flag to track when assemble_python_lines has stored input that it hasn't
559 565 # given back yet.
560 566 within_python_line = False
561 567
562 568 # Private attributes
563 569
564 570 # List with lines of raw input accumulated so far.
565 571 _buffer_raw = None
566 572
567 573 def __init__(self, line_input_checker=True, physical_line_transforms=None,
568 574 logical_line_transforms=None, python_line_transforms=None):
569 575 super(IPythonInputSplitter, self).__init__()
570 576 self._buffer_raw = []
571 577 self._validate = True
572 578
573 579 if physical_line_transforms is not None:
574 580 self.physical_line_transforms = physical_line_transforms
575 581 else:
576 582 self.physical_line_transforms = [
577 583 leading_indent(),
578 584 classic_prompt(),
579 585 ipy_prompt(),
580 586 cellmagic(end_on_blank_line=line_input_checker),
581 587 ]
582 588
583 589 self.assemble_logical_lines = assemble_logical_lines()
584 590 if logical_line_transforms is not None:
585 591 self.logical_line_transforms = logical_line_transforms
586 592 else:
587 593 self.logical_line_transforms = [
588 594 help_end(),
589 595 escaped_commands(),
590 596 assign_from_magic(),
591 597 assign_from_system(),
592 598 ]
593 599
594 600 self.assemble_python_lines = assemble_python_lines()
595 601 if python_line_transforms is not None:
596 602 self.python_line_transforms = python_line_transforms
597 603 else:
598 604 # We don't use any of these at present
599 605 self.python_line_transforms = []
600 606
601 607 @property
602 608 def transforms(self):
603 609 "Quick access to all transformers."
604 610 return self.physical_line_transforms + \
605 611 [self.assemble_logical_lines] + self.logical_line_transforms + \
606 612 [self.assemble_python_lines] + self.python_line_transforms
607 613
608 614 @property
609 615 def transforms_in_use(self):
610 616 """Transformers, excluding logical line transformers if we're in a
611 617 Python line."""
612 618 t = self.physical_line_transforms[:]
613 619 if not self.within_python_line:
614 620 t += [self.assemble_logical_lines] + self.logical_line_transforms
615 621 return t + [self.assemble_python_lines] + self.python_line_transforms
616 622
617 623 def reset(self):
618 624 """Reset the input buffer and associated state."""
619 625 super(IPythonInputSplitter, self).reset()
620 626 self._buffer_raw[:] = []
621 627 self.source_raw = ''
622 628 self.transformer_accumulating = False
623 629 self.within_python_line = False
624 630
625 631 for t in self.transforms:
626 632 try:
627 633 t.reset()
628 634 except SyntaxError:
629 635 # Nothing that calls reset() expects to handle transformer
630 636 # errors
631 637 pass
632 638
633 639 def flush_transformers(self):
634 640 def _flush(transform, outs):
635 641 """yield transformed lines
636 642
637 643 always strings, never None
638 644
639 645 transform: the current transform
640 646 outs: an iterable of previously transformed inputs.
641 647 Each may be multiline, which will be passed
642 648 one line at a time to transform.
643 649 """
644 650 for out in outs:
645 651 for line in out.splitlines():
646 652 # push one line at a time
647 653 tmp = transform.push(line)
648 654 if tmp is not None:
649 655 yield tmp
650 656
651 657 # reset the transform
652 658 tmp = transform.reset()
653 659 if tmp is not None:
654 660 yield tmp
655 661
656 662 out = []
657 663 for t in self.transforms_in_use:
658 664 out = _flush(t, out)
659 665
660 666 out = list(out)
661 667 if out:
662 668 self._store('\n'.join(out))
663 669
664 670 def raw_reset(self):
665 671 """Return raw input only and perform a full reset.
666 672 """
667 673 out = self.source_raw
668 674 self.reset()
669 675 return out
670 676
671 677 def source_reset(self):
672 678 try:
673 679 self.flush_transformers()
674 680 return self.source
675 681 finally:
676 682 self.reset()
677 683
678 684 def push_accepts_more(self):
679 685 if self.transformer_accumulating:
680 686 return True
681 687 else:
682 688 return super(IPythonInputSplitter, self).push_accepts_more()
683 689
684 690 def transform_cell(self, cell):
685 691 """Process and translate a cell of input.
686 692 """
687 693 self.reset()
688 694 try:
689 695 self.push(cell)
690 696 self.flush_transformers()
691 697 return self.source
692 698 finally:
693 699 self.reset()
694 700
695 701 def push(self, lines:str) -> bool:
696 702 """Push one or more lines of IPython input.
697 703
698 704 This stores the given lines and returns a status code indicating
699 705 whether the code forms a complete Python block or not, after processing
700 706 all input lines for special IPython syntax.
701 707
702 708 Any exceptions generated in compilation are swallowed, but if an
703 709 exception was produced, the method returns True.
704 710
705 711 Parameters
706 712 ----------
707 713 lines : string
708 714 One or more lines of Python input.
709 715
710 716 Returns
711 717 -------
712 718 is_complete : boolean
713 719 True if the current input source (the result of the current input
714 720 plus prior inputs) forms a complete Python execution block. Note that
715 721 this value is also stored as a private attribute (_is_complete), so it
716 722 can be queried at any time.
717 723 """
718 724 assert isinstance(lines, str)
719 725 # We must ensure all input is pure unicode
720 726 # ''.splitlines() --> [], but we need to push the empty line to transformers
721 727 lines_list = lines.splitlines()
722 728 if not lines_list:
723 729 lines_list = ['']
724 730
725 731 # Store raw source before applying any transformations to it. Note
726 732 # that this must be done *after* the reset() call that would otherwise
727 733 # flush the buffer.
728 734 self._store(lines, self._buffer_raw, 'source_raw')
729 735
730 736 transformed_lines_list = []
731 737 for line in lines_list:
732 738 transformed = self._transform_line(line)
733 739 if transformed is not None:
734 740 transformed_lines_list.append(transformed)
735 741
736 742 if transformed_lines_list:
737 743 transformed_lines = '\n'.join(transformed_lines_list)
738 744 return super(IPythonInputSplitter, self).push(transformed_lines)
739 745 else:
740 746 # Got nothing back from transformers - they must be waiting for
741 747 # more input.
742 748 return False
743 749
744 750 def _transform_line(self, line):
745 751 """Push a line of input code through the various transformers.
746 752
747 753 Returns any output from the transformers, or None if a transformer
748 754 is accumulating lines.
749 755
750 756 Sets self.transformer_accumulating as a side effect.
751 757 """
752 758 def _accumulating(dbg):
753 759 #print(dbg)
754 760 self.transformer_accumulating = True
755 761 return None
756 762
757 763 for transformer in self.physical_line_transforms:
758 764 line = transformer.push(line)
759 765 if line is None:
760 766 return _accumulating(transformer)
761 767
762 768 if not self.within_python_line:
763 769 line = self.assemble_logical_lines.push(line)
764 770 if line is None:
765 771 return _accumulating('acc logical line')
766 772
767 773 for transformer in self.logical_line_transforms:
768 774 line = transformer.push(line)
769 775 if line is None:
770 776 return _accumulating(transformer)
771 777
772 778 line = self.assemble_python_lines.push(line)
773 779 if line is None:
774 780 self.within_python_line = True
775 781 return _accumulating('acc python line')
776 782 else:
777 783 self.within_python_line = False
778 784
779 785 for transformer in self.python_line_transforms:
780 786 line = transformer.push(line)
781 787 if line is None:
782 788 return _accumulating(transformer)
783 789
784 790 #print("transformers clear") #debug
785 791 self.transformer_accumulating = False
786 792 return line
787 793
@@ -1,752 +1,782 b''
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for working with strings and text.
4 4
5 5 Inheritance diagram:
6 6
7 7 .. inheritance-diagram:: IPython.utils.text
8 8 :parts: 3
9 9 """
10 10
11 11 import os
12 12 import re
13 13 import string
14 14 import sys
15 15 import textwrap
16 import warnings
16 17 from string import Formatter
17 18 from pathlib import Path
18 19
19 20
20 # datetime.strftime date format for ipython
21 if sys.platform == 'win32':
22 date_format = "%B %d, %Y"
23 else:
24 date_format = "%B %-d, %Y"
25
26 21 class LSString(str):
27 22 """String derivative with a special access attributes.
28 23
29 24 These are normal strings, but with the special attributes:
30 25
31 26 .l (or .list) : value as list (split on newlines).
32 27 .n (or .nlstr): original value (the string itself).
33 28 .s (or .spstr): value as whitespace-separated string.
34 29 .p (or .paths): list of path objects (requires path.py package)
35 30
36 31 Any values which require transformations are computed only once and
37 32 cached.
38 33
39 34 Such strings are very useful to efficiently interact with the shell, which
40 35 typically only understands whitespace-separated options for commands."""
41 36
42 37 def get_list(self):
43 38 try:
44 39 return self.__list
45 40 except AttributeError:
46 41 self.__list = self.split('\n')
47 42 return self.__list
48 43
49 44 l = list = property(get_list)
50 45
51 46 def get_spstr(self):
52 47 try:
53 48 return self.__spstr
54 49 except AttributeError:
55 50 self.__spstr = self.replace('\n',' ')
56 51 return self.__spstr
57 52
58 53 s = spstr = property(get_spstr)
59 54
60 55 def get_nlstr(self):
61 56 return self
62 57
63 58 n = nlstr = property(get_nlstr)
64 59
65 60 def get_paths(self):
66 61 try:
67 62 return self.__paths
68 63 except AttributeError:
69 64 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 65 return self.__paths
71 66
72 67 p = paths = property(get_paths)
73 68
74 69 # FIXME: We need to reimplement type specific displayhook and then add this
75 70 # back as a custom printer. This should also be moved outside utils into the
76 71 # core.
77 72
78 73 # def print_lsstring(arg):
79 74 # """ Prettier (non-repr-like) and more informative printer for LSString """
80 75 # print "LSString (.p, .n, .l, .s available). Value:"
81 76 # print arg
82 77 #
83 78 #
84 79 # print_lsstring = result_display.register(LSString)(print_lsstring)
85 80
86 81
87 82 class SList(list):
88 83 """List derivative with a special access attributes.
89 84
90 85 These are normal lists, but with the special attributes:
91 86
92 87 * .l (or .list) : value as list (the list itself).
93 88 * .n (or .nlstr): value as a string, joined on newlines.
94 89 * .s (or .spstr): value as a string, joined on spaces.
95 90 * .p (or .paths): list of path objects (requires path.py package)
96 91
97 92 Any values which require transformations are computed only once and
98 93 cached."""
99 94
100 95 def get_list(self):
101 96 return self
102 97
103 98 l = list = property(get_list)
104 99
105 100 def get_spstr(self):
106 101 try:
107 102 return self.__spstr
108 103 except AttributeError:
109 104 self.__spstr = ' '.join(self)
110 105 return self.__spstr
111 106
112 107 s = spstr = property(get_spstr)
113 108
114 109 def get_nlstr(self):
115 110 try:
116 111 return self.__nlstr
117 112 except AttributeError:
118 113 self.__nlstr = '\n'.join(self)
119 114 return self.__nlstr
120 115
121 116 n = nlstr = property(get_nlstr)
122 117
123 118 def get_paths(self):
124 119 try:
125 120 return self.__paths
126 121 except AttributeError:
127 122 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 123 return self.__paths
129 124
130 125 p = paths = property(get_paths)
131 126
132 127 def grep(self, pattern, prune = False, field = None):
133 128 """ Return all strings matching 'pattern' (a regex or callable)
134 129
135 130 This is case-insensitive. If prune is true, return all items
136 131 NOT matching the pattern.
137 132
138 133 If field is specified, the match must occur in the specified
139 134 whitespace-separated field.
140 135
141 136 Examples::
142 137
143 138 a.grep( lambda x: x.startswith('C') )
144 139 a.grep('Cha.*log', prune=1)
145 140 a.grep('chm', field=-1)
146 141 """
147 142
148 143 def match_target(s):
149 144 if field is None:
150 145 return s
151 146 parts = s.split()
152 147 try:
153 148 tgt = parts[field]
154 149 return tgt
155 150 except IndexError:
156 151 return ""
157 152
158 153 if isinstance(pattern, str):
159 154 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 155 else:
161 156 pred = pattern
162 157 if not prune:
163 158 return SList([el for el in self if pred(match_target(el))])
164 159 else:
165 160 return SList([el for el in self if not pred(match_target(el))])
166 161
167 162 def fields(self, *fields):
168 163 """ Collect whitespace-separated fields from string list
169 164
170 165 Allows quick awk-like usage of string lists.
171 166
172 167 Example data (in var a, created by 'a = !ls -l')::
173 168
174 169 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 170 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
176 171
177 172 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 173 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 174 (note the joining by space).
180 175 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
181 176
182 177 IndexErrors are ignored.
183 178
184 179 Without args, fields() just split()'s the strings.
185 180 """
186 181 if len(fields) == 0:
187 182 return [el.split() for el in self]
188 183
189 184 res = SList()
190 185 for el in [f.split() for f in self]:
191 186 lineparts = []
192 187
193 188 for fd in fields:
194 189 try:
195 190 lineparts.append(el[fd])
196 191 except IndexError:
197 192 pass
198 193 if lineparts:
199 194 res.append(" ".join(lineparts))
200 195
201 196 return res
202 197
203 198 def sort(self,field= None, nums = False):
204 199 """ sort by specified fields (see fields())
205 200
206 201 Example::
207 202
208 203 a.sort(1, nums = True)
209 204
210 205 Sorts a by second field, in numerical order (so that 21 > 3)
211 206
212 207 """
213 208
214 209 #decorate, sort, undecorate
215 210 if field is not None:
216 211 dsu = [[SList([line]).fields(field), line] for line in self]
217 212 else:
218 213 dsu = [[line, line] for line in self]
219 214 if nums:
220 215 for i in range(len(dsu)):
221 216 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 217 try:
223 218 n = int(numstr)
224 219 except ValueError:
225 220 n = 0
226 221 dsu[i][0] = n
227 222
228 223
229 224 dsu.sort()
230 225 return SList([t[1] for t in dsu])
231 226
232 227
233 228 # FIXME: We need to reimplement type specific displayhook and then add this
234 229 # back as a custom printer. This should also be moved outside utils into the
235 230 # core.
236 231
237 232 # def print_slist(arg):
238 233 # """ Prettier (non-repr-like) and more informative printer for SList """
239 234 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240 235 # if hasattr(arg, 'hideonce') and arg.hideonce:
241 236 # arg.hideonce = False
242 237 # return
243 238 #
244 239 # nlprint(arg) # This was a nested list printer, now removed.
245 240 #
246 241 # print_slist = result_display.register(SList)(print_slist)
247 242
248 243
249 244 def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 245 """Indent a string a given number of spaces or tabstops.
251 246
252 247 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
253 248
254 249 Parameters
255 250 ----------
256 251 instr : basestring
257 252 The string to be indented.
258 253 nspaces : int (default: 4)
259 254 The number of spaces to be indented.
260 255 ntabs : int (default: 0)
261 256 The number of tabs to be indented.
262 257 flatten : bool (default: False)
263 258 Whether to scrub existing indentation. If True, all lines will be
264 259 aligned to the same indentation. If False, existing indentation will
265 260 be strictly increased.
266 261
267 262 Returns
268 263 -------
269 264 str|unicode : string indented by ntabs and nspaces.
270 265
271 266 """
272 267 if instr is None:
273 268 return
274 269 ind = '\t'*ntabs+' '*nspaces
275 270 if flatten:
276 271 pat = re.compile(r'^\s*', re.MULTILINE)
277 272 else:
278 273 pat = re.compile(r'^', re.MULTILINE)
279 274 outstr = re.sub(pat, ind, instr)
280 275 if outstr.endswith(os.linesep+ind):
281 276 return outstr[:-len(ind)]
282 277 else:
283 278 return outstr
284 279
285 280
286 281 def list_strings(arg):
287 282 """Always return a list of strings, given a string or list of strings
288 283 as input.
289 284
290 285 Examples
291 286 --------
292 287 ::
293 288
294 289 In [7]: list_strings('A single string')
295 290 Out[7]: ['A single string']
296 291
297 292 In [8]: list_strings(['A single string in a list'])
298 293 Out[8]: ['A single string in a list']
299 294
300 295 In [9]: list_strings(['A','list','of','strings'])
301 296 Out[9]: ['A', 'list', 'of', 'strings']
302 297 """
303 298
304 299 if isinstance(arg, str):
305 300 return [arg]
306 301 else:
307 302 return arg
308 303
309 304
310 305 def marquee(txt='',width=78,mark='*'):
311 306 """Return the input string centered in a 'marquee'.
312 307
313 308 Examples
314 309 --------
315 310 ::
316 311
317 312 In [16]: marquee('A test',40)
318 313 Out[16]: '**************** A test ****************'
319 314
320 315 In [17]: marquee('A test',40,'-')
321 316 Out[17]: '---------------- A test ----------------'
322 317
323 318 In [18]: marquee('A test',40,' ')
324 319 Out[18]: ' A test '
325 320
326 321 """
327 322 if not txt:
328 323 return (mark*width)[:width]
329 324 nmark = (width-len(txt)-2)//len(mark)//2
330 325 if nmark < 0: nmark =0
331 326 marks = mark*nmark
332 327 return '%s %s %s' % (marks,txt,marks)
333 328
334 329
335 330 ini_spaces_re = re.compile(r'^(\s+)')
336 331
337 332 def num_ini_spaces(strng):
338 333 """Return the number of initial spaces in a string"""
339
334 warnings.warn(
335 "`num_ini_spaces` is Pending Deprecation since IPython 8.17."
336 "It is considered fro removal in in future version. "
337 "Please open an issue if you believe it should be kept.",
338 stacklevel=2,
339 category=PendingDeprecationWarning,
340 )
340 341 ini_spaces = ini_spaces_re.match(strng)
341 342 if ini_spaces:
342 343 return ini_spaces.end()
343 344 else:
344 345 return 0
345 346
346 347
347 348 def format_screen(strng):
348 349 """Format a string for screen printing.
349 350
350 351 This removes some latex-type format codes."""
351 352 # Paragraph continue
352 353 par_re = re.compile(r'\\$',re.MULTILINE)
353 354 strng = par_re.sub('',strng)
354 355 return strng
355 356
356 357
357 358 def dedent(text):
358 359 """Equivalent of textwrap.dedent that ignores unindented first line.
359 360
360 361 This means it will still dedent strings like:
361 362 '''foo
362 363 is a bar
363 364 '''
364 365
365 366 For use in wrap_paragraphs.
366 367 """
367 368
368 369 if text.startswith('\n'):
369 370 # text starts with blank line, don't ignore the first line
370 371 return textwrap.dedent(text)
371 372
372 373 # split first line
373 374 splits = text.split('\n',1)
374 375 if len(splits) == 1:
375 376 # only one line
376 377 return textwrap.dedent(text)
377 378
378 379 first, rest = splits
379 380 # dedent everything but the first line
380 381 rest = textwrap.dedent(rest)
381 382 return '\n'.join([first, rest])
382 383
383 384
384 385 def wrap_paragraphs(text, ncols=80):
385 386 """Wrap multiple paragraphs to fit a specified width.
386 387
387 388 This is equivalent to textwrap.wrap, but with support for multiple
388 389 paragraphs, as separated by empty lines.
389 390
390 391 Returns
391 392 -------
392 393 list of complete paragraphs, wrapped to fill `ncols` columns.
393 394 """
395 warnings.warn(
396 "`wrap_paragraphs` is Pending Deprecation since IPython 8.17."
397 "It is considered fro removal in in future version. "
398 "Please open an issue if you believe it should be kept.",
399 stacklevel=2,
400 category=PendingDeprecationWarning,
401 )
394 402 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
395 403 text = dedent(text).strip()
396 404 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
397 405 out_ps = []
398 406 indent_re = re.compile(r'\n\s+', re.MULTILINE)
399 407 for p in paragraphs:
400 408 # presume indentation that survives dedent is meaningful formatting,
401 409 # so don't fill unless text is flush.
402 410 if indent_re.search(p) is None:
403 411 # wrap paragraph
404 412 p = textwrap.fill(p, ncols)
405 413 out_ps.append(p)
406 414 return out_ps
407 415
408 416
409 417 def strip_email_quotes(text):
410 418 """Strip leading email quotation characters ('>').
411 419
412 420 Removes any combination of leading '>' interspersed with whitespace that
413 421 appears *identically* in all lines of the input text.
414 422
415 423 Parameters
416 424 ----------
417 425 text : str
418 426
419 427 Examples
420 428 --------
421 429
422 430 Simple uses::
423 431
424 432 In [2]: strip_email_quotes('> > text')
425 433 Out[2]: 'text'
426 434
427 435 In [3]: strip_email_quotes('> > text\\n> > more')
428 436 Out[3]: 'text\\nmore'
429 437
430 438 Note how only the common prefix that appears in all lines is stripped::
431 439
432 440 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
433 441 Out[4]: '> text\\n> more\\nmore...'
434 442
435 443 So if any line has no quote marks ('>'), then none are stripped from any
436 444 of them ::
437 445
438 446 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
439 447 Out[5]: '> > text\\n> > more\\nlast different'
440 448 """
441 449 lines = text.splitlines()
442 450 strip_len = 0
443 451
444 452 for characters in zip(*lines):
445 453 # Check if all characters in this position are the same
446 454 if len(set(characters)) > 1:
447 455 break
448 456 prefix_char = characters[0]
449 457
450 458 if prefix_char in string.whitespace or prefix_char == ">":
451 459 strip_len += 1
452 460 else:
453 461 break
454 462
455 463 text = "\n".join([ln[strip_len:] for ln in lines])
456 464 return text
457 465
458 466
459 467 def strip_ansi(source):
460 468 """
461 469 Remove ansi escape codes from text.
462 470
463 471 Parameters
464 472 ----------
465 473 source : str
466 474 Source to remove the ansi from
467 475 """
476 warnings.warn(
477 "`strip_ansi` is Pending Deprecation since IPython 8.17."
478 "It is considered fro removal in in future version. "
479 "Please open an issue if you believe it should be kept.",
480 stacklevel=2,
481 category=PendingDeprecationWarning,
482 )
483
468 484 return re.sub(r'\033\[(\d|;)+?m', '', source)
469 485
470 486
471 487 class EvalFormatter(Formatter):
472 488 """A String Formatter that allows evaluation of simple expressions.
473 489
474 490 Note that this version interprets a `:` as specifying a format string (as per
475 491 standard string formatting), so if slicing is required, you must explicitly
476 492 create a slice.
477 493
478 494 This is to be used in templating cases, such as the parallel batch
479 495 script templates, where simple arithmetic on arguments is useful.
480 496
481 497 Examples
482 498 --------
483 499 ::
484 500
485 501 In [1]: f = EvalFormatter()
486 502 In [2]: f.format('{n//4}', n=8)
487 503 Out[2]: '2'
488 504
489 505 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
490 506 Out[3]: 'll'
491 507 """
492 508 def get_field(self, name, args, kwargs):
493 509 v = eval(name, kwargs)
494 510 return v, name
495 511
496 512 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
497 513 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
498 514 # above, it should be possible to remove FullEvalFormatter.
499 515
500 516 class FullEvalFormatter(Formatter):
501 517 """A String Formatter that allows evaluation of simple expressions.
502 518
503 519 Any time a format key is not found in the kwargs,
504 520 it will be tried as an expression in the kwargs namespace.
505 521
506 522 Note that this version allows slicing using [1:2], so you cannot specify
507 523 a format string. Use :class:`EvalFormatter` to permit format strings.
508 524
509 525 Examples
510 526 --------
511 527 ::
512 528
513 529 In [1]: f = FullEvalFormatter()
514 530 In [2]: f.format('{n//4}', n=8)
515 531 Out[2]: '2'
516 532
517 533 In [3]: f.format('{list(range(5))[2:4]}')
518 534 Out[3]: '[2, 3]'
519 535
520 536 In [4]: f.format('{3*2}')
521 537 Out[4]: '6'
522 538 """
523 539 # copied from Formatter._vformat with minor changes to allow eval
524 540 # and replace the format_spec code with slicing
525 541 def vformat(self, format_string:str, args, kwargs)->str:
526 542 result = []
527 543 for literal_text, field_name, format_spec, conversion in \
528 544 self.parse(format_string):
529 545
530 546 # output the literal text
531 547 if literal_text:
532 548 result.append(literal_text)
533 549
534 550 # if there's a field, output it
535 551 if field_name is not None:
536 552 # this is some markup, find the object and do
537 553 # the formatting
538 554
539 555 if format_spec:
540 556 # override format spec, to allow slicing:
541 557 field_name = ':'.join([field_name, format_spec])
542 558
543 559 # eval the contents of the field for the object
544 560 # to be formatted
545 561 obj = eval(field_name, kwargs)
546 562
547 563 # do any conversion on the resulting object
548 564 obj = self.convert_field(obj, conversion)
549 565
550 566 # format the object and append to the result
551 567 result.append(self.format_field(obj, ''))
552 568
553 569 return ''.join(result)
554 570
555 571
556 572 class DollarFormatter(FullEvalFormatter):
557 573 """Formatter allowing Itpl style $foo replacement, for names and attribute
558 574 access only. Standard {foo} replacement also works, and allows full
559 575 evaluation of its arguments.
560 576
561 577 Examples
562 578 --------
563 579 ::
564 580
565 581 In [1]: f = DollarFormatter()
566 582 In [2]: f.format('{n//4}', n=8)
567 583 Out[2]: '2'
568 584
569 585 In [3]: f.format('23 * 76 is $result', result=23*76)
570 586 Out[3]: '23 * 76 is 1748'
571 587
572 588 In [4]: f.format('$a or {b}', a=1, b=2)
573 589 Out[4]: '1 or 2'
574 590 """
575 591 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
576 592 def parse(self, fmt_string):
577 593 for literal_txt, field_name, format_spec, conversion \
578 594 in Formatter.parse(self, fmt_string):
579 595
580 596 # Find $foo patterns in the literal text.
581 597 continue_from = 0
582 598 txt = ""
583 599 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
584 600 new_txt, new_field = m.group(1,2)
585 601 # $$foo --> $foo
586 602 if new_field.startswith("$"):
587 603 txt += new_txt + new_field
588 604 else:
589 605 yield (txt + new_txt, new_field, "", None)
590 606 txt = ""
591 607 continue_from = m.end()
592 608
593 609 # Re-yield the {foo} style pattern
594 610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
595 611
596 612 def __repr__(self):
597 613 return "<DollarFormatter>"
598 614
599 615 #-----------------------------------------------------------------------------
600 616 # Utils to columnize a list of string
601 617 #-----------------------------------------------------------------------------
602 618
603 619 def _col_chunks(l, max_rows, row_first=False):
604 620 """Yield successive max_rows-sized column chunks from l."""
605 621 if row_first:
606 622 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
607 623 for i in range(ncols):
608 624 yield [l[j] for j in range(i, len(l), ncols)]
609 625 else:
610 626 for i in range(0, len(l), max_rows):
611 627 yield l[i:(i + max_rows)]
612 628
613 629
614 630 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
615 631 """Calculate optimal info to columnize a list of string"""
616 632 for max_rows in range(1, len(rlist) + 1):
617 633 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
618 634 sumlength = sum(col_widths)
619 635 ncols = len(col_widths)
620 636 if sumlength + separator_size * (ncols - 1) <= displaywidth:
621 637 break
622 638 return {'num_columns': ncols,
623 639 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
624 640 'max_rows': max_rows,
625 641 'column_widths': col_widths
626 642 }
627 643
628 644
629 645 def _get_or_default(mylist, i, default=None):
630 646 """return list item number, or default if don't exist"""
631 647 if i >= len(mylist):
632 648 return default
633 649 else :
634 650 return mylist[i]
635 651
636 652
637 653 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
638 654 """Returns a nested list, and info to columnize items
639 655
640 656 Parameters
641 657 ----------
642 658 items
643 659 list of strings to columize
644 660 row_first : (default False)
645 661 Whether to compute columns for a row-first matrix instead of
646 662 column-first (default).
647 663 empty : (default None)
648 664 default value to fill list if needed
649 665 separator_size : int (default=2)
650 666 How much characters will be used as a separation between each columns.
651 667 displaywidth : int (default=80)
652 668 The width of the area onto which the columns should enter
653 669
654 670 Returns
655 671 -------
656 672 strings_matrix
657 673 nested list of string, the outer most list contains as many list as
658 674 rows, the innermost lists have each as many element as columns. If the
659 675 total number of elements in `items` does not equal the product of
660 676 rows*columns, the last element of some lists are filled with `None`.
661 677 dict_info
662 678 some info to make columnize easier:
663 679
664 680 num_columns
665 681 number of columns
666 682 max_rows
667 683 maximum number of rows (final number may be less)
668 684 column_widths
669 685 list of with of each columns
670 686 optimal_separator_width
671 687 best separator width between columns
672 688
673 689 Examples
674 690 --------
675 691 ::
676 692
677 693 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
678 694 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
679 695 In [3]: list
680 696 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
681 697 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
682 698 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
683 699 Out[5]: True
684 700 """
701 warnings.warn(
702 "`compute_item_matrix` is Pending Deprecation since IPython 8.17."
703 "It is considered fro removal in in future version. "
704 "Please open an issue if you believe it should be kept.",
705 stacklevel=2,
706 category=PendingDeprecationWarning,
707 )
685 708 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
686 709 nrow, ncol = info['max_rows'], info['num_columns']
687 710 if row_first:
688 711 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
689 712 else:
690 713 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
691 714
692 715
693 716 def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False):
694 717 """Transform a list of strings into a single string with columns.
695 718
696 719 Parameters
697 720 ----------
698 721 items : sequence of strings
699 722 The strings to process.
700 723 row_first : (default False)
701 724 Whether to compute columns for a row-first matrix instead of
702 725 column-first (default).
703 726 separator : str, optional [default is two spaces]
704 727 The string that separates columns.
705 728 displaywidth : int, optional [default is 80]
706 729 Width of the display in number of characters.
707 730
708 731 Returns
709 732 -------
710 733 The formatted string.
711 734 """
735 warnings.warn(
736 "`columnize` is Pending Deprecation since IPython 8.17."
737 "It is considered fro removal in in future version. "
738 "Please open an issue if you believe it should be kept.",
739 stacklevel=2,
740 category=PendingDeprecationWarning,
741 )
712 742 if not items:
713 743 return '\n'
714 744 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
715 745 if spread:
716 746 separator = separator.ljust(int(info['optimal_separator_width']))
717 747 fmatrix = [filter(None, x) for x in matrix]
718 748 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
719 749 return '\n'.join(map(sjoin, fmatrix))+'\n'
720 750
721 751
722 752 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
723 753 """
724 754 Return a string with a natural enumeration of items
725 755
726 756 >>> get_text_list(['a', 'b', 'c', 'd'])
727 757 'a, b, c and d'
728 758 >>> get_text_list(['a', 'b', 'c'], ' or ')
729 759 'a, b or c'
730 760 >>> get_text_list(['a', 'b', 'c'], ', ')
731 761 'a, b, c'
732 762 >>> get_text_list(['a', 'b'], ' or ')
733 763 'a or b'
734 764 >>> get_text_list(['a'])
735 765 'a'
736 766 >>> get_text_list([])
737 767 ''
738 768 >>> get_text_list(['a', 'b'], wrap_item_with="`")
739 769 '`a` and `b`'
740 770 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
741 771 'a + b + c = d'
742 772 """
743 773 if len(list_) == 0:
744 774 return ''
745 775 if wrap_item_with:
746 776 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
747 777 item in list_]
748 778 if len(list_) == 1:
749 779 return list_[0]
750 780 return '%s%s%s' % (
751 781 sep.join(i for i in list_[:-1]),
752 782 last_sep, list_[-1])
General Comments 0
You need to be logged in to leave comments. Login now