##// END OF EJS Templates
Merge pull request #11250 from Carreau/invalid-escape-sequence...
Min RK -
r24469:eed56ba3 merge
parent child Browse files
Show More
@@ -1,2060 +1,2060 b''
1 1 """Completion for IPython.
2 2
3 3 This module started as fork of the rlcompleter module in the Python standard
4 4 library. The original enhancements made to rlcompleter have been sent
5 5 upstream and were accepted as of Python 2.3,
6 6
7 7 This module now support a wide variety of completion mechanism both available
8 8 for normal classic Python code, as well as completer for IPython specific
9 9 Syntax like magics.
10 10
11 11 Latex and Unicode completion
12 12 ============================
13 13
14 14 IPython and compatible frontends not only can complete your code, but can help
15 15 you to input a wide range of characters. In particular we allow you to insert
16 16 a unicode character using the tab completion mechanism.
17 17
18 18 Forward latex/unicode completion
19 19 --------------------------------
20 20
21 21 Forward completion allows you to easily type a unicode character using its latex
22 22 name, or unicode long description. To do so type a backslash follow by the
23 23 relevant name and press tab:
24 24
25 25
26 26 Using latex completion:
27 27
28 28 .. code::
29 29
30 30 \\alpha<tab>
31 31 Ξ±
32 32
33 33 or using unicode completion:
34 34
35 35
36 36 .. code::
37 37
38 38 \\greek small letter alpha<tab>
39 39 Ξ±
40 40
41 41
42 42 Only valid Python identifiers will complete. Combining characters (like arrow or
43 43 dots) are also available, unlike latex they need to be put after the their
44 44 counterpart that is to say, `F\\\\vec<tab>` is correct, not `\\\\vec<tab>F`.
45 45
46 46 Some browsers are known to display combining characters incorrectly.
47 47
48 48 Backward latex completion
49 49 -------------------------
50 50
51 51 It is sometime challenging to know how to type a character, if you are using
52 52 IPython, or any compatible frontend you can prepend backslash to the character
53 53 and press `<tab>` to expand it to its latex form.
54 54
55 55 .. code::
56 56
57 57 \\Ξ±<tab>
58 58 \\alpha
59 59
60 60
61 61 Both forward and backward completions can be deactivated by setting the
62 62 ``Completer.backslash_combining_completions`` option to ``False``.
63 63
64 64
65 65 Experimental
66 66 ============
67 67
68 68 Starting with IPython 6.0, this module can make use of the Jedi library to
69 69 generate completions both using static analysis of the code, and dynamically
70 70 inspecting multiple namespaces. The APIs attached to this new mechanism is
71 71 unstable and will raise unless use in an :any:`provisionalcompleter` context
72 72 manager.
73 73
74 74 You will find that the following are experimental:
75 75
76 76 - :any:`provisionalcompleter`
77 77 - :any:`IPCompleter.completions`
78 78 - :any:`Completion`
79 79 - :any:`rectify_completions`
80 80
81 81 .. note::
82 82
83 83 better name for :any:`rectify_completions` ?
84 84
85 85 We welcome any feedback on these new API, and we also encourage you to try this
86 86 module in debug mode (start IPython with ``--Completer.debug=True``) in order
87 87 to have extra logging information is :any:`jedi` is crashing, or if current
88 88 IPython completer pending deprecations are returning results not yet handled
89 89 by :any:`jedi`
90 90
91 91 Using Jedi for tab completion allow snippets like the following to work without
92 92 having to execute any code:
93 93
94 94 >>> myvar = ['hello', 42]
95 95 ... myvar[1].bi<tab>
96 96
97 97 Tab completion will be able to infer that ``myvar[1]`` is a real number without
98 98 executing any code unlike the previously available ``IPCompleter.greedy``
99 99 option.
100 100
101 101 Be sure to update :any:`jedi` to the latest stable version or to try the
102 102 current development version to get better completions.
103 103 """
104 104
105 105
106 106 # Copyright (c) IPython Development Team.
107 107 # Distributed under the terms of the Modified BSD License.
108 108 #
109 109 # Some of this code originated from rlcompleter in the Python standard library
110 110 # Copyright (C) 2001 Python Software Foundation, www.python.org
111 111
112 112
113 113 import __main__
114 114 import builtins as builtin_mod
115 115 import glob
116 116 import time
117 117 import inspect
118 118 import itertools
119 119 import keyword
120 120 import os
121 121 import re
122 122 import sys
123 123 import unicodedata
124 124 import string
125 125 import warnings
126 126
127 127 from contextlib import contextmanager
128 128 from importlib import import_module
129 129 from typing import Iterator, List, Tuple, Iterable, Union
130 130 from types import SimpleNamespace
131 131
132 132 from traitlets.config.configurable import Configurable
133 133 from IPython.core.error import TryNext
134 134 from IPython.core.inputsplitter import ESC_MAGIC
135 135 from IPython.core.latex_symbols import latex_symbols, reverse_latex_symbol
136 136 from IPython.core.oinspect import InspectColors
137 137 from IPython.utils import generics
138 138 from IPython.utils.dir2 import dir2, get_real_method
139 139 from IPython.utils.process import arg_split
140 140 from traitlets import Bool, Enum, observe, Int
141 141
142 142 # skip module docstests
143 143 skip_doctest = True
144 144
145 145 try:
146 146 import jedi
147 147 jedi.settings.case_insensitive_completion = False
148 148 import jedi.api.helpers
149 149 import jedi.api.classes
150 150 JEDI_INSTALLED = True
151 151 except ImportError:
152 152 JEDI_INSTALLED = False
153 153 #-----------------------------------------------------------------------------
154 154 # Globals
155 155 #-----------------------------------------------------------------------------
156 156
157 157 # Public API
158 158 __all__ = ['Completer','IPCompleter']
159 159
160 160 if sys.platform == 'win32':
161 161 PROTECTABLES = ' '
162 162 else:
163 163 PROTECTABLES = ' ()[]{}?=\\|;:\'#*"^&'
164 164
165 165 # Protect against returning an enormous number of completions which the frontend
166 166 # may have trouble processing.
167 167 MATCHES_LIMIT = 500
168 168
169 169 _deprecation_readline_sentinel = object()
170 170
171 171
172 172 class ProvisionalCompleterWarning(FutureWarning):
173 173 """
174 174 Exception raise by an experimental feature in this module.
175 175
176 176 Wrap code in :any:`provisionalcompleter` context manager if you
177 177 are certain you want to use an unstable feature.
178 178 """
179 179 pass
180 180
181 181 warnings.filterwarnings('error', category=ProvisionalCompleterWarning)
182 182
183 183 @contextmanager
184 184 def provisionalcompleter(action='ignore'):
185 185 """
186 186
187 187
188 188 This contest manager has to be used in any place where unstable completer
189 189 behavior and API may be called.
190 190
191 191 >>> with provisionalcompleter():
192 192 ... completer.do_experimetal_things() # works
193 193
194 194 >>> completer.do_experimental_things() # raises.
195 195
196 196 .. note:: Unstable
197 197
198 198 By using this context manager you agree that the API in use may change
199 199 without warning, and that you won't complain if they do so.
200 200
201 201 You also understand that if the API is not to you liking you should report
202 202 a bug to explain your use case upstream and improve the API and will loose
203 203 credibility if you complain after the API is make stable.
204 204
205 205 We'll be happy to get your feedback , feature request and improvement on
206 206 any of the unstable APIs !
207 207 """
208 208 with warnings.catch_warnings():
209 209 warnings.filterwarnings(action, category=ProvisionalCompleterWarning)
210 210 yield
211 211
212 212
213 213 def has_open_quotes(s):
214 214 """Return whether a string has open quotes.
215 215
216 216 This simply counts whether the number of quote characters of either type in
217 217 the string is odd.
218 218
219 219 Returns
220 220 -------
221 221 If there is an open quote, the quote character is returned. Else, return
222 222 False.
223 223 """
224 224 # We check " first, then ', so complex cases with nested quotes will get
225 225 # the " to take precedence.
226 226 if s.count('"') % 2:
227 227 return '"'
228 228 elif s.count("'") % 2:
229 229 return "'"
230 230 else:
231 231 return False
232 232
233 233
234 234 def protect_filename(s, protectables=PROTECTABLES):
235 235 """Escape a string to protect certain characters."""
236 236 if set(s) & set(protectables):
237 237 if sys.platform == "win32":
238 238 return '"' + s + '"'
239 239 else:
240 240 return "".join(("\\" + c if c in protectables else c) for c in s)
241 241 else:
242 242 return s
243 243
244 244
245 245 def expand_user(path:str) -> Tuple[str, bool, str]:
246 246 """Expand ``~``-style usernames in strings.
247 247
248 248 This is similar to :func:`os.path.expanduser`, but it computes and returns
249 249 extra information that will be useful if the input was being used in
250 250 computing completions, and you wish to return the completions with the
251 251 original '~' instead of its expanded value.
252 252
253 253 Parameters
254 254 ----------
255 255 path : str
256 256 String to be expanded. If no ~ is present, the output is the same as the
257 257 input.
258 258
259 259 Returns
260 260 -------
261 261 newpath : str
262 262 Result of ~ expansion in the input path.
263 263 tilde_expand : bool
264 264 Whether any expansion was performed or not.
265 265 tilde_val : str
266 266 The value that ~ was replaced with.
267 267 """
268 268 # Default values
269 269 tilde_expand = False
270 270 tilde_val = ''
271 271 newpath = path
272 272
273 273 if path.startswith('~'):
274 274 tilde_expand = True
275 275 rest = len(path)-1
276 276 newpath = os.path.expanduser(path)
277 277 if rest:
278 278 tilde_val = newpath[:-rest]
279 279 else:
280 280 tilde_val = newpath
281 281
282 282 return newpath, tilde_expand, tilde_val
283 283
284 284
285 285 def compress_user(path:str, tilde_expand:bool, tilde_val:str) -> str:
286 286 """Does the opposite of expand_user, with its outputs.
287 287 """
288 288 if tilde_expand:
289 289 return path.replace(tilde_val, '~')
290 290 else:
291 291 return path
292 292
293 293
294 294 def completions_sorting_key(word):
295 295 """key for sorting completions
296 296
297 297 This does several things:
298 298
299 299 - Demote any completions starting with underscores to the end
300 300 - Insert any %magic and %%cellmagic completions in the alphabetical order
301 301 by their name
302 302 """
303 303 prio1, prio2 = 0, 0
304 304
305 305 if word.startswith('__'):
306 306 prio1 = 2
307 307 elif word.startswith('_'):
308 308 prio1 = 1
309 309
310 310 if word.endswith('='):
311 311 prio1 = -1
312 312
313 313 if word.startswith('%%'):
314 314 # If there's another % in there, this is something else, so leave it alone
315 315 if not "%" in word[2:]:
316 316 word = word[2:]
317 317 prio2 = 2
318 318 elif word.startswith('%'):
319 319 if not "%" in word[1:]:
320 320 word = word[1:]
321 321 prio2 = 1
322 322
323 323 return prio1, word, prio2
324 324
325 325
326 326 class _FakeJediCompletion:
327 327 """
328 328 This is a workaround to communicate to the UI that Jedi has crashed and to
329 329 report a bug. Will be used only id :any:`IPCompleter.debug` is set to true.
330 330
331 331 Added in IPython 6.0 so should likely be removed for 7.0
332 332
333 333 """
334 334
335 335 def __init__(self, name):
336 336
337 337 self.name = name
338 338 self.complete = name
339 339 self.type = 'crashed'
340 340 self.name_with_symbols = name
341 341 self.signature = ''
342 342 self._origin = 'fake'
343 343
344 344 def __repr__(self):
345 345 return '<Fake completion object jedi has crashed>'
346 346
347 347
348 348 class Completion:
349 349 """
350 350 Completion object used and return by IPython completers.
351 351
352 352 .. warning:: Unstable
353 353
354 354 This function is unstable, API may change without warning.
355 355 It will also raise unless use in proper context manager.
356 356
357 357 This act as a middle ground :any:`Completion` object between the
358 358 :any:`jedi.api.classes.Completion` object and the Prompt Toolkit completion
359 359 object. While Jedi need a lot of information about evaluator and how the
360 360 code should be ran/inspected, PromptToolkit (and other frontend) mostly
361 361 need user facing information.
362 362
363 363 - Which range should be replaced replaced by what.
364 364 - Some metadata (like completion type), or meta information to displayed to
365 365 the use user.
366 366
367 367 For debugging purpose we can also store the origin of the completion (``jedi``,
368 368 ``IPython.python_matches``, ``IPython.magics_matches``...).
369 369 """
370 370
371 371 __slots__ = ['start', 'end', 'text', 'type', 'signature', '_origin']
372 372
373 373 def __init__(self, start: int, end: int, text: str, *, type: str=None, _origin='', signature='') -> None:
374 374 warnings.warn("``Completion`` is a provisional API (as of IPython 6.0). "
375 375 "It may change without warnings. "
376 376 "Use in corresponding context manager.",
377 377 category=ProvisionalCompleterWarning, stacklevel=2)
378 378
379 379 self.start = start
380 380 self.end = end
381 381 self.text = text
382 382 self.type = type
383 383 self.signature = signature
384 384 self._origin = _origin
385 385
386 386 def __repr__(self):
387 387 return '<Completion start=%s end=%s text=%r type=%r, signature=%r,>' % \
388 388 (self.start, self.end, self.text, self.type or '?', self.signature or '?')
389 389
390 390 def __eq__(self, other)->Bool:
391 391 """
392 392 Equality and hash do not hash the type (as some completer may not be
393 393 able to infer the type), but are use to (partially) de-duplicate
394 394 completion.
395 395
396 396 Completely de-duplicating completion is a bit tricker that just
397 397 comparing as it depends on surrounding text, which Completions are not
398 398 aware of.
399 399 """
400 400 return self.start == other.start and \
401 401 self.end == other.end and \
402 402 self.text == other.text
403 403
404 404 def __hash__(self):
405 405 return hash((self.start, self.end, self.text))
406 406
407 407
408 408 _IC = Iterable[Completion]
409 409
410 410
411 411 def _deduplicate_completions(text: str, completions: _IC)-> _IC:
412 412 """
413 413 Deduplicate a set of completions.
414 414
415 415 .. warning:: Unstable
416 416
417 417 This function is unstable, API may change without warning.
418 418
419 419 Parameters
420 420 ----------
421 421 text: str
422 422 text that should be completed.
423 423 completions: Iterator[Completion]
424 424 iterator over the completions to deduplicate
425 425
426 426 Yields
427 427 ------
428 428 `Completions` objects
429 429
430 430
431 431 Completions coming from multiple sources, may be different but end up having
432 432 the same effect when applied to ``text``. If this is the case, this will
433 433 consider completions as equal and only emit the first encountered.
434 434
435 435 Not folded in `completions()` yet for debugging purpose, and to detect when
436 436 the IPython completer does return things that Jedi does not, but should be
437 437 at some point.
438 438 """
439 439 completions = list(completions)
440 440 if not completions:
441 441 return
442 442
443 443 new_start = min(c.start for c in completions)
444 444 new_end = max(c.end for c in completions)
445 445
446 446 seen = set()
447 447 for c in completions:
448 448 new_text = text[new_start:c.start] + c.text + text[c.end:new_end]
449 449 if new_text not in seen:
450 450 yield c
451 451 seen.add(new_text)
452 452
453 453
454 454 def rectify_completions(text: str, completions: _IC, *, _debug=False)->_IC:
455 455 """
456 456 Rectify a set of completions to all have the same ``start`` and ``end``
457 457
458 458 .. warning:: Unstable
459 459
460 460 This function is unstable, API may change without warning.
461 461 It will also raise unless use in proper context manager.
462 462
463 463 Parameters
464 464 ----------
465 465 text: str
466 466 text that should be completed.
467 467 completions: Iterator[Completion]
468 468 iterator over the completions to rectify
469 469
470 470
471 471 :any:`jedi.api.classes.Completion` s returned by Jedi may not have the same start and end, though
472 472 the Jupyter Protocol requires them to behave like so. This will readjust
473 473 the completion to have the same ``start`` and ``end`` by padding both
474 474 extremities with surrounding text.
475 475
476 476 During stabilisation should support a ``_debug`` option to log which
477 477 completion are return by the IPython completer and not found in Jedi in
478 478 order to make upstream bug report.
479 479 """
480 480 warnings.warn("`rectify_completions` is a provisional API (as of IPython 6.0). "
481 481 "It may change without warnings. "
482 482 "Use in corresponding context manager.",
483 483 category=ProvisionalCompleterWarning, stacklevel=2)
484 484
485 485 completions = list(completions)
486 486 if not completions:
487 487 return
488 488 starts = (c.start for c in completions)
489 489 ends = (c.end for c in completions)
490 490
491 491 new_start = min(starts)
492 492 new_end = max(ends)
493 493
494 494 seen_jedi = set()
495 495 seen_python_matches = set()
496 496 for c in completions:
497 497 new_text = text[new_start:c.start] + c.text + text[c.end:new_end]
498 498 if c._origin == 'jedi':
499 499 seen_jedi.add(new_text)
500 500 elif c._origin == 'IPCompleter.python_matches':
501 501 seen_python_matches.add(new_text)
502 502 yield Completion(new_start, new_end, new_text, type=c.type, _origin=c._origin, signature=c.signature)
503 503 diff = seen_python_matches.difference(seen_jedi)
504 504 if diff and _debug:
505 505 print('IPython.python matches have extras:', diff)
506 506
507 507
508 508 if sys.platform == 'win32':
509 509 DELIMS = ' \t\n`!@#$^&*()=+[{]}|;\'",<>?'
510 510 else:
511 511 DELIMS = ' \t\n`!@#$^&*()=+[{]}\\|;:\'",<>?'
512 512
513 513 GREEDY_DELIMS = ' =\r\n'
514 514
515 515
516 516 class CompletionSplitter(object):
517 517 """An object to split an input line in a manner similar to readline.
518 518
519 519 By having our own implementation, we can expose readline-like completion in
520 520 a uniform manner to all frontends. This object only needs to be given the
521 521 line of text to be split and the cursor position on said line, and it
522 522 returns the 'word' to be completed on at the cursor after splitting the
523 523 entire line.
524 524
525 525 What characters are used as splitting delimiters can be controlled by
526 526 setting the ``delims`` attribute (this is a property that internally
527 527 automatically builds the necessary regular expression)"""
528 528
529 529 # Private interface
530 530
531 531 # A string of delimiter characters. The default value makes sense for
532 532 # IPython's most typical usage patterns.
533 533 _delims = DELIMS
534 534
535 535 # The expression (a normal string) to be compiled into a regular expression
536 536 # for actual splitting. We store it as an attribute mostly for ease of
537 537 # debugging, since this type of code can be so tricky to debug.
538 538 _delim_expr = None
539 539
540 540 # The regular expression that does the actual splitting
541 541 _delim_re = None
542 542
543 543 def __init__(self, delims=None):
544 544 delims = CompletionSplitter._delims if delims is None else delims
545 545 self.delims = delims
546 546
547 547 @property
548 548 def delims(self):
549 549 """Return the string of delimiter characters."""
550 550 return self._delims
551 551
552 552 @delims.setter
553 553 def delims(self, delims):
554 554 """Set the delimiters for line splitting."""
555 555 expr = '[' + ''.join('\\'+ c for c in delims) + ']'
556 556 self._delim_re = re.compile(expr)
557 557 self._delims = delims
558 558 self._delim_expr = expr
559 559
560 560 def split_line(self, line, cursor_pos=None):
561 561 """Split a line of text with a cursor at the given position.
562 562 """
563 563 l = line if cursor_pos is None else line[:cursor_pos]
564 564 return self._delim_re.split(l)[-1]
565 565
566 566
567 567
568 568 class Completer(Configurable):
569 569
570 570 greedy = Bool(False,
571 571 help="""Activate greedy completion
572 572 PENDING DEPRECTION. this is now mostly taken care of with Jedi.
573 573
574 574 This will enable completion on elements of lists, results of function calls, etc.,
575 575 but can be unsafe because the code is actually evaluated on TAB.
576 576 """
577 577 ).tag(config=True)
578 578
579 579 use_jedi = Bool(default_value=False,
580 580 help="Experimental: Use Jedi to generate autocompletions. "
581 581 "Off by default.").tag(config=True)
582 582
583 583 jedi_compute_type_timeout = Int(default_value=400,
584 584 help="""Experimental: restrict time (in milliseconds) during which Jedi can compute types.
585 585 Set to 0 to stop computing types. Non-zero value lower than 100ms may hurt
586 586 performance by preventing jedi to build its cache.
587 587 """).tag(config=True)
588 588
589 589 debug = Bool(default_value=False,
590 590 help='Enable debug for the Completer. Mostly print extra '
591 591 'information for experimental jedi integration.')\
592 592 .tag(config=True)
593 593
594 594 backslash_combining_completions = Bool(True,
595 595 help="Enable unicode completions, e.g. \\alpha<tab> . "
596 596 "Includes completion of latex commands, unicode names, and expanding "
597 597 "unicode characters back to latex commands.").tag(config=True)
598 598
599 599
600 600
601 601 def __init__(self, namespace=None, global_namespace=None, **kwargs):
602 602 """Create a new completer for the command line.
603 603
604 604 Completer(namespace=ns, global_namespace=ns2) -> completer instance.
605 605
606 606 If unspecified, the default namespace where completions are performed
607 607 is __main__ (technically, __main__.__dict__). Namespaces should be
608 608 given as dictionaries.
609 609
610 610 An optional second namespace can be given. This allows the completer
611 611 to handle cases where both the local and global scopes need to be
612 612 distinguished.
613 613 """
614 614
615 615 # Don't bind to namespace quite yet, but flag whether the user wants a
616 616 # specific namespace or to use __main__.__dict__. This will allow us
617 617 # to bind to __main__.__dict__ at completion time, not now.
618 618 if namespace is None:
619 619 self.use_main_ns = True
620 620 else:
621 621 self.use_main_ns = False
622 622 self.namespace = namespace
623 623
624 624 # The global namespace, if given, can be bound directly
625 625 if global_namespace is None:
626 626 self.global_namespace = {}
627 627 else:
628 628 self.global_namespace = global_namespace
629 629
630 630 super(Completer, self).__init__(**kwargs)
631 631
632 632 def complete(self, text, state):
633 633 """Return the next possible completion for 'text'.
634 634
635 635 This is called successively with state == 0, 1, 2, ... until it
636 636 returns None. The completion should begin with 'text'.
637 637
638 638 """
639 639 if self.use_main_ns:
640 640 self.namespace = __main__.__dict__
641 641
642 642 if state == 0:
643 643 if "." in text:
644 644 self.matches = self.attr_matches(text)
645 645 else:
646 646 self.matches = self.global_matches(text)
647 647 try:
648 648 return self.matches[state]
649 649 except IndexError:
650 650 return None
651 651
652 652 def global_matches(self, text):
653 653 """Compute matches when text is a simple name.
654 654
655 655 Return a list of all keywords, built-in functions and names currently
656 656 defined in self.namespace or self.global_namespace that match.
657 657
658 658 """
659 659 matches = []
660 660 match_append = matches.append
661 661 n = len(text)
662 662 for lst in [keyword.kwlist,
663 663 builtin_mod.__dict__.keys(),
664 664 self.namespace.keys(),
665 665 self.global_namespace.keys()]:
666 666 for word in lst:
667 667 if word[:n] == text and word != "__builtins__":
668 668 match_append(word)
669 669
670 670 snake_case_re = re.compile(r"[^_]+(_[^_]+)+?\Z")
671 671 for lst in [self.namespace.keys(),
672 672 self.global_namespace.keys()]:
673 673 shortened = {"_".join([sub[0] for sub in word.split('_')]) : word
674 674 for word in lst if snake_case_re.match(word)}
675 675 for word in shortened.keys():
676 676 if word[:n] == text and word != "__builtins__":
677 677 match_append(shortened[word])
678 678 return matches
679 679
680 680 def attr_matches(self, text):
681 681 """Compute matches when text contains a dot.
682 682
683 683 Assuming the text is of the form NAME.NAME....[NAME], and is
684 684 evaluatable in self.namespace or self.global_namespace, it will be
685 685 evaluated and its attributes (as revealed by dir()) are used as
686 686 possible completions. (For class instances, class members are
687 687 also considered.)
688 688
689 689 WARNING: this can still invoke arbitrary C code, if an object
690 690 with a __getattr__ hook is evaluated.
691 691
692 692 """
693 693
694 694 # Another option, seems to work great. Catches things like ''.<tab>
695 695 m = re.match(r"(\S+(\.\w+)*)\.(\w*)$", text)
696 696
697 697 if m:
698 698 expr, attr = m.group(1, 3)
699 699 elif self.greedy:
700 700 m2 = re.match(r"(.+)\.(\w*)$", self.line_buffer)
701 701 if not m2:
702 702 return []
703 703 expr, attr = m2.group(1,2)
704 704 else:
705 705 return []
706 706
707 707 try:
708 708 obj = eval(expr, self.namespace)
709 709 except:
710 710 try:
711 711 obj = eval(expr, self.global_namespace)
712 712 except:
713 713 return []
714 714
715 715 if self.limit_to__all__ and hasattr(obj, '__all__'):
716 716 words = get__all__entries(obj)
717 717 else:
718 718 words = dir2(obj)
719 719
720 720 try:
721 721 words = generics.complete_object(obj, words)
722 722 except TryNext:
723 723 pass
724 724 except AssertionError:
725 725 raise
726 726 except Exception:
727 727 # Silence errors from completion function
728 728 #raise # dbg
729 729 pass
730 730 # Build match list to return
731 731 n = len(attr)
732 732 return [u"%s.%s" % (expr, w) for w in words if w[:n] == attr ]
733 733
734 734
735 735 def get__all__entries(obj):
736 736 """returns the strings in the __all__ attribute"""
737 737 try:
738 738 words = getattr(obj, '__all__')
739 739 except:
740 740 return []
741 741
742 742 return [w for w in words if isinstance(w, str)]
743 743
744 744
745 745 def match_dict_keys(keys: List[str], prefix: str, delims: str):
746 746 """Used by dict_key_matches, matching the prefix to a list of keys
747 747
748 748 Parameters
749 749 ==========
750 750 keys:
751 751 list of keys in dictionary currently being completed.
752 752 prefix:
753 753 Part of the text already typed by the user. e.g. `mydict[b'fo`
754 754 delims:
755 755 String of delimiters to consider when finding the current key.
756 756
757 757 Returns
758 758 =======
759 759
760 760 A tuple of three elements: ``quote``, ``token_start``, ``matched``, with
761 761 ``quote`` being the quote that need to be used to close current string.
762 762 ``token_start`` the position where the replacement should start occurring,
763 763 ``matches`` a list of replacement/completion
764 764
765 765 """
766 766 if not prefix:
767 767 return None, 0, [repr(k) for k in keys
768 768 if isinstance(k, (str, bytes))]
769 769 quote_match = re.search('["\']', prefix)
770 770 quote = quote_match.group()
771 771 try:
772 772 prefix_str = eval(prefix + quote, {})
773 773 except Exception:
774 774 return None, 0, []
775 775
776 776 pattern = '[^' + ''.join('\\' + c for c in delims) + ']*$'
777 777 token_match = re.search(pattern, prefix, re.UNICODE)
778 778 token_start = token_match.start()
779 779 token_prefix = token_match.group()
780 780
781 781 matched = []
782 782 for key in keys:
783 783 try:
784 784 if not key.startswith(prefix_str):
785 785 continue
786 786 except (AttributeError, TypeError, UnicodeError):
787 787 # Python 3+ TypeError on b'a'.startswith('a') or vice-versa
788 788 continue
789 789
790 790 # reformat remainder of key to begin with prefix
791 791 rem = key[len(prefix_str):]
792 792 # force repr wrapped in '
793 793 rem_repr = repr(rem + '"') if isinstance(rem, str) else repr(rem + b'"')
794 794 if rem_repr.startswith('u') and prefix[0] not in 'uU':
795 795 # Found key is unicode, but prefix is Py2 string.
796 796 # Therefore attempt to interpret key as string.
797 797 try:
798 798 rem_repr = repr(rem.encode('ascii') + '"')
799 799 except UnicodeEncodeError:
800 800 continue
801 801
802 802 rem_repr = rem_repr[1 + rem_repr.index("'"):-2]
803 803 if quote == '"':
804 804 # The entered prefix is quoted with ",
805 805 # but the match is quoted with '.
806 806 # A contained " hence needs escaping for comparison:
807 807 rem_repr = rem_repr.replace('"', '\\"')
808 808
809 809 # then reinsert prefix from start of token
810 810 matched.append('%s%s' % (token_prefix, rem_repr))
811 811 return quote, token_start, matched
812 812
813 813
814 814 def cursor_to_position(text:str, line:int, column:int)->int:
815 815 """
816 816
817 817 Convert the (line,column) position of the cursor in text to an offset in a
818 818 string.
819 819
820 820 Parameters
821 821 ----------
822 822
823 823 text : str
824 824 The text in which to calculate the cursor offset
825 825 line : int
826 826 Line of the cursor; 0-indexed
827 827 column : int
828 828 Column of the cursor 0-indexed
829 829
830 830 Return
831 831 ------
832 832 Position of the cursor in ``text``, 0-indexed.
833 833
834 834 See Also
835 835 --------
836 836 position_to_cursor: reciprocal of this function
837 837
838 838 """
839 839 lines = text.split('\n')
840 840 assert line <= len(lines), '{} <= {}'.format(str(line), str(len(lines)))
841 841
842 842 return sum(len(l) + 1 for l in lines[:line]) + column
843 843
844 844 def position_to_cursor(text:str, offset:int)->Tuple[int, int]:
845 845 """
846 846 Convert the position of the cursor in text (0 indexed) to a line
847 847 number(0-indexed) and a column number (0-indexed) pair
848 848
849 849 Position should be a valid position in ``text``.
850 850
851 851 Parameters
852 852 ----------
853 853
854 854 text : str
855 855 The text in which to calculate the cursor offset
856 856 offset : int
857 857 Position of the cursor in ``text``, 0-indexed.
858 858
859 859 Return
860 860 ------
861 861 (line, column) : (int, int)
862 862 Line of the cursor; 0-indexed, column of the cursor 0-indexed
863 863
864 864
865 865 See Also
866 866 --------
867 867 cursor_to_position : reciprocal of this function
868 868
869 869
870 870 """
871 871
872 872 assert 0 <= offset <= len(text) , "0 <= %s <= %s" % (offset , len(text))
873 873
874 874 before = text[:offset]
875 875 blines = before.split('\n') # ! splitnes trim trailing \n
876 876 line = before.count('\n')
877 877 col = len(blines[-1])
878 878 return line, col
879 879
880 880
881 881 def _safe_isinstance(obj, module, class_name):
882 882 """Checks if obj is an instance of module.class_name if loaded
883 883 """
884 884 return (module in sys.modules and
885 885 isinstance(obj, getattr(import_module(module), class_name)))
886 886
887 887
888 888 def back_unicode_name_matches(text):
889 889 u"""Match unicode characters back to unicode name
890 890
891 891 This does ``β˜ƒ`` -> ``\\snowman``
892 892
893 893 Note that snowman is not a valid python3 combining character but will be expanded.
894 894 Though it will not recombine back to the snowman character by the completion machinery.
895 895
896 896 This will not either back-complete standard sequences like \\n, \\b ...
897 897
898 898 Used on Python 3 only.
899 899 """
900 900 if len(text)<2:
901 901 return u'', ()
902 902 maybe_slash = text[-2]
903 903 if maybe_slash != '\\':
904 904 return u'', ()
905 905
906 906 char = text[-1]
907 907 # no expand on quote for completion in strings.
908 908 # nor backcomplete standard ascii keys
909 909 if char in string.ascii_letters or char in ['"',"'"]:
910 910 return u'', ()
911 911 try :
912 912 unic = unicodedata.name(char)
913 913 return '\\'+char,['\\'+unic]
914 914 except KeyError:
915 915 pass
916 916 return u'', ()
917 917
918 918 def back_latex_name_matches(text:str):
919 919 """Match latex characters back to unicode name
920 920
921 921 This does ``\\β„΅`` -> ``\\aleph``
922 922
923 923 Used on Python 3 only.
924 924 """
925 925 if len(text)<2:
926 926 return u'', ()
927 927 maybe_slash = text[-2]
928 928 if maybe_slash != '\\':
929 929 return u'', ()
930 930
931 931
932 932 char = text[-1]
933 933 # no expand on quote for completion in strings.
934 934 # nor backcomplete standard ascii keys
935 935 if char in string.ascii_letters or char in ['"',"'"]:
936 936 return u'', ()
937 937 try :
938 938 latex = reverse_latex_symbol[char]
939 939 # '\\' replace the \ as well
940 940 return '\\'+char,[latex]
941 941 except KeyError:
942 942 pass
943 943 return u'', ()
944 944
945 945
946 946 def _formatparamchildren(parameter) -> str:
947 947 """
948 948 Get parameter name and value from Jedi Private API
949 949
950 950 Jedi does not expose a simple way to get `param=value` from its API.
951 951
952 952 Parameter
953 953 =========
954 954
955 955 parameter:
956 956 Jedi's function `Param`
957 957
958 958 Returns
959 959 =======
960 960
961 961 A string like 'a', 'b=1', '*args', '**kwargs'
962 962
963 963
964 964 """
965 965 description = parameter.description
966 966 if not description.startswith('param '):
967 967 raise ValueError('Jedi function parameter description have change format.'
968 968 'Expected "param ...", found %r".' % description)
969 969 return description[6:]
970 970
971 971 def _make_signature(completion)-> str:
972 972 """
973 973 Make the signature from a jedi completion
974 974
975 975 Parameter
976 976 =========
977 977
978 978 completion: jedi.Completion
979 979 object does not complete a function type
980 980
981 981 Returns
982 982 =======
983 983
984 984 a string consisting of the function signature, with the parenthesis but
985 985 without the function name. example:
986 986 `(a, *args, b=1, **kwargs)`
987 987
988 988 """
989 989
990 990 return '(%s)'% ', '.join([f for f in (_formatparamchildren(p) for p in completion.params) if f])
991 991
992 992 class IPCompleter(Completer):
993 993 """Extension of the completer class with IPython-specific features"""
994 994
995 995 @observe('greedy')
996 996 def _greedy_changed(self, change):
997 997 """update the splitter and readline delims when greedy is changed"""
998 998 if change['new']:
999 999 self.splitter.delims = GREEDY_DELIMS
1000 1000 else:
1001 1001 self.splitter.delims = DELIMS
1002 1002
1003 1003 merge_completions = Bool(True,
1004 1004 help="""Whether to merge completion results into a single list
1005 1005
1006 1006 If False, only the completion results from the first non-empty
1007 1007 completer will be returned.
1008 1008 """
1009 1009 ).tag(config=True)
1010 1010 omit__names = Enum((0,1,2), default_value=2,
1011 1011 help="""Instruct the completer to omit private method names
1012 1012
1013 1013 Specifically, when completing on ``object.<tab>``.
1014 1014
1015 1015 When 2 [default]: all names that start with '_' will be excluded.
1016 1016
1017 1017 When 1: all 'magic' names (``__foo__``) will be excluded.
1018 1018
1019 1019 When 0: nothing will be excluded.
1020 1020 """
1021 1021 ).tag(config=True)
1022 1022 limit_to__all__ = Bool(False,
1023 1023 help="""
1024 1024 DEPRECATED as of version 5.0.
1025 1025
1026 1026 Instruct the completer to use __all__ for the completion
1027 1027
1028 1028 Specifically, when completing on ``object.<tab>``.
1029 1029
1030 1030 When True: only those names in obj.__all__ will be included.
1031 1031
1032 1032 When False [default]: the __all__ attribute is ignored
1033 1033 """,
1034 1034 ).tag(config=True)
1035 1035
1036 1036 @observe('limit_to__all__')
1037 1037 def _limit_to_all_changed(self, change):
1038 1038 warnings.warn('`IPython.core.IPCompleter.limit_to__all__` configuration '
1039 1039 'value has been deprecated since IPython 5.0, will be made to have '
1040 1040 'no effects and then removed in future version of IPython.',
1041 1041 UserWarning)
1042 1042
1043 1043 def __init__(self, shell=None, namespace=None, global_namespace=None,
1044 1044 use_readline=_deprecation_readline_sentinel, config=None, **kwargs):
1045 1045 """IPCompleter() -> completer
1046 1046
1047 1047 Return a completer object.
1048 1048
1049 1049 Parameters
1050 1050 ----------
1051 1051
1052 1052 shell
1053 1053 a pointer to the ipython shell itself. This is needed
1054 1054 because this completer knows about magic functions, and those can
1055 1055 only be accessed via the ipython instance.
1056 1056
1057 1057 namespace : dict, optional
1058 1058 an optional dict where completions are performed.
1059 1059
1060 1060 global_namespace : dict, optional
1061 1061 secondary optional dict for completions, to
1062 1062 handle cases (such as IPython embedded inside functions) where
1063 1063 both Python scopes are visible.
1064 1064
1065 1065 use_readline : bool, optional
1066 1066 DEPRECATED, ignored since IPython 6.0, will have no effects
1067 1067 """
1068 1068
1069 1069 self.magic_escape = ESC_MAGIC
1070 1070 self.splitter = CompletionSplitter()
1071 1071
1072 1072 if use_readline is not _deprecation_readline_sentinel:
1073 1073 warnings.warn('The `use_readline` parameter is deprecated and ignored since IPython 6.0.',
1074 1074 DeprecationWarning, stacklevel=2)
1075 1075
1076 1076 # _greedy_changed() depends on splitter and readline being defined:
1077 1077 Completer.__init__(self, namespace=namespace, global_namespace=global_namespace,
1078 1078 config=config, **kwargs)
1079 1079
1080 1080 # List where completion matches will be stored
1081 1081 self.matches = []
1082 1082 self.shell = shell
1083 1083 # Regexp to split filenames with spaces in them
1084 1084 self.space_name_re = re.compile(r'([^\\] )')
1085 1085 # Hold a local ref. to glob.glob for speed
1086 1086 self.glob = glob.glob
1087 1087
1088 1088 # Determine if we are running on 'dumb' terminals, like (X)Emacs
1089 1089 # buffers, to avoid completion problems.
1090 1090 term = os.environ.get('TERM','xterm')
1091 1091 self.dumb_terminal = term in ['dumb','emacs']
1092 1092
1093 1093 # Special handling of backslashes needed in win32 platforms
1094 1094 if sys.platform == "win32":
1095 1095 self.clean_glob = self._clean_glob_win32
1096 1096 else:
1097 1097 self.clean_glob = self._clean_glob
1098 1098
1099 1099 #regexp to parse docstring for function signature
1100 1100 self.docstring_sig_re = re.compile(r'^[\w|\s.]+\(([^)]*)\).*')
1101 1101 self.docstring_kwd_re = re.compile(r'[\s|\[]*(\w+)(?:\s*=\s*.*)')
1102 1102 #use this if positional argument name is also needed
1103 1103 #= re.compile(r'[\s|\[]*(\w+)(?:\s*=?\s*.*)')
1104 1104
1105 1105 self.magic_arg_matchers = [
1106 1106 self.magic_config_matches,
1107 1107 self.magic_color_matches,
1108 1108 ]
1109 1109
1110 1110 # This is set externally by InteractiveShell
1111 1111 self.custom_completers = None
1112 1112
1113 1113 @property
1114 1114 def matchers(self):
1115 1115 """All active matcher routines for completion"""
1116 1116 if self.use_jedi:
1117 1117 return [
1118 1118 self.file_matches,
1119 1119 self.magic_matches,
1120 1120 self.dict_key_matches,
1121 1121 ]
1122 1122 else:
1123 1123 return [
1124 1124 self.python_matches,
1125 1125 self.file_matches,
1126 1126 self.magic_matches,
1127 1127 self.python_func_kw_matches,
1128 1128 self.dict_key_matches,
1129 1129 ]
1130 1130
1131 1131 def all_completions(self, text):
1132 1132 """
1133 1133 Wrapper around the complete method for the benefit of emacs.
1134 1134 """
1135 1135 return self.complete(text)[1]
1136 1136
1137 1137 def _clean_glob(self, text):
1138 1138 return self.glob("%s*" % text)
1139 1139
1140 1140 def _clean_glob_win32(self,text):
1141 1141 return [f.replace("\\","/")
1142 1142 for f in self.glob("%s*" % text)]
1143 1143
1144 1144 def file_matches(self, text):
1145 1145 """Match filenames, expanding ~USER type strings.
1146 1146
1147 1147 Most of the seemingly convoluted logic in this completer is an
1148 1148 attempt to handle filenames with spaces in them. And yet it's not
1149 1149 quite perfect, because Python's readline doesn't expose all of the
1150 1150 GNU readline details needed for this to be done correctly.
1151 1151
1152 1152 For a filename with a space in it, the printed completions will be
1153 1153 only the parts after what's already been typed (instead of the
1154 1154 full completions, as is normally done). I don't think with the
1155 1155 current (as of Python 2.3) Python readline it's possible to do
1156 1156 better."""
1157 1157
1158 1158 # chars that require escaping with backslash - i.e. chars
1159 1159 # that readline treats incorrectly as delimiters, but we
1160 1160 # don't want to treat as delimiters in filename matching
1161 1161 # when escaped with backslash
1162 1162 if text.startswith('!'):
1163 1163 text = text[1:]
1164 1164 text_prefix = u'!'
1165 1165 else:
1166 1166 text_prefix = u''
1167 1167
1168 1168 text_until_cursor = self.text_until_cursor
1169 1169 # track strings with open quotes
1170 1170 open_quotes = has_open_quotes(text_until_cursor)
1171 1171
1172 1172 if '(' in text_until_cursor or '[' in text_until_cursor:
1173 1173 lsplit = text
1174 1174 else:
1175 1175 try:
1176 1176 # arg_split ~ shlex.split, but with unicode bugs fixed by us
1177 1177 lsplit = arg_split(text_until_cursor)[-1]
1178 1178 except ValueError:
1179 1179 # typically an unmatched ", or backslash without escaped char.
1180 1180 if open_quotes:
1181 1181 lsplit = text_until_cursor.split(open_quotes)[-1]
1182 1182 else:
1183 1183 return []
1184 1184 except IndexError:
1185 1185 # tab pressed on empty line
1186 1186 lsplit = ""
1187 1187
1188 1188 if not open_quotes and lsplit != protect_filename(lsplit):
1189 1189 # if protectables are found, do matching on the whole escaped name
1190 1190 has_protectables = True
1191 1191 text0,text = text,lsplit
1192 1192 else:
1193 1193 has_protectables = False
1194 1194 text = os.path.expanduser(text)
1195 1195
1196 1196 if text == "":
1197 1197 return [text_prefix + protect_filename(f) for f in self.glob("*")]
1198 1198
1199 1199 # Compute the matches from the filesystem
1200 1200 if sys.platform == 'win32':
1201 1201 m0 = self.clean_glob(text)
1202 1202 else:
1203 1203 m0 = self.clean_glob(text.replace('\\', ''))
1204 1204
1205 1205 if has_protectables:
1206 1206 # If we had protectables, we need to revert our changes to the
1207 1207 # beginning of filename so that we don't double-write the part
1208 1208 # of the filename we have so far
1209 1209 len_lsplit = len(lsplit)
1210 1210 matches = [text_prefix + text0 +
1211 1211 protect_filename(f[len_lsplit:]) for f in m0]
1212 1212 else:
1213 1213 if open_quotes:
1214 1214 # if we have a string with an open quote, we don't need to
1215 1215 # protect the names beyond the quote (and we _shouldn't_, as
1216 1216 # it would cause bugs when the filesystem call is made).
1217 1217 matches = m0 if sys.platform == "win32" else\
1218 1218 [protect_filename(f, open_quotes) for f in m0]
1219 1219 else:
1220 1220 matches = [text_prefix +
1221 1221 protect_filename(f) for f in m0]
1222 1222
1223 1223 # Mark directories in input list by appending '/' to their names.
1224 1224 return [x+'/' if os.path.isdir(x) else x for x in matches]
1225 1225
1226 1226 def magic_matches(self, text):
1227 1227 """Match magics"""
1228 1228 # Get all shell magics now rather than statically, so magics loaded at
1229 1229 # runtime show up too.
1230 1230 lsm = self.shell.magics_manager.lsmagic()
1231 1231 line_magics = lsm['line']
1232 1232 cell_magics = lsm['cell']
1233 1233 pre = self.magic_escape
1234 1234 pre2 = pre+pre
1235 1235
1236 1236 explicit_magic = text.startswith(pre)
1237 1237
1238 1238 # Completion logic:
1239 1239 # - user gives %%: only do cell magics
1240 1240 # - user gives %: do both line and cell magics
1241 1241 # - no prefix: do both
1242 1242 # In other words, line magics are skipped if the user gives %% explicitly
1243 1243 #
1244 1244 # We also exclude magics that match any currently visible names:
1245 1245 # https://github.com/ipython/ipython/issues/4877, unless the user has
1246 1246 # typed a %:
1247 1247 # https://github.com/ipython/ipython/issues/10754
1248 1248 bare_text = text.lstrip(pre)
1249 1249 global_matches = self.global_matches(bare_text)
1250 1250 if not explicit_magic:
1251 1251 def matches(magic):
1252 1252 """
1253 1253 Filter magics, in particular remove magics that match
1254 1254 a name present in global namespace.
1255 1255 """
1256 1256 return ( magic.startswith(bare_text) and
1257 1257 magic not in global_matches )
1258 1258 else:
1259 1259 def matches(magic):
1260 1260 return magic.startswith(bare_text)
1261 1261
1262 1262 comp = [ pre2+m for m in cell_magics if matches(m)]
1263 1263 if not text.startswith(pre2):
1264 1264 comp += [ pre+m for m in line_magics if matches(m)]
1265 1265
1266 1266 return comp
1267 1267
1268 1268 def magic_config_matches(self, text:str) -> List[str]:
1269 1269 """ Match class names and attributes for %config magic """
1270 1270 texts = text.strip().split()
1271 1271
1272 1272 if len(texts) > 0 and (texts[0] == 'config' or texts[0] == '%config'):
1273 1273 # get all configuration classes
1274 1274 classes = sorted(set([ c for c in self.shell.configurables
1275 1275 if c.__class__.class_traits(config=True)
1276 1276 ]), key=lambda x: x.__class__.__name__)
1277 1277 classnames = [ c.__class__.__name__ for c in classes ]
1278 1278
1279 1279 # return all classnames if config or %config is given
1280 1280 if len(texts) == 1:
1281 1281 return classnames
1282 1282
1283 1283 # match classname
1284 1284 classname_texts = texts[1].split('.')
1285 1285 classname = classname_texts[0]
1286 1286 classname_matches = [ c for c in classnames
1287 1287 if c.startswith(classname) ]
1288 1288
1289 1289 # return matched classes or the matched class with attributes
1290 1290 if texts[1].find('.') < 0:
1291 1291 return classname_matches
1292 1292 elif len(classname_matches) == 1 and \
1293 1293 classname_matches[0] == classname:
1294 1294 cls = classes[classnames.index(classname)].__class__
1295 1295 help = cls.class_get_help()
1296 1296 # strip leading '--' from cl-args:
1297 1297 help = re.sub(re.compile(r'^--', re.MULTILINE), '', help)
1298 1298 return [ attr.split('=')[0]
1299 1299 for attr in help.strip().splitlines()
1300 1300 if attr.startswith(texts[1]) ]
1301 1301 return []
1302 1302
1303 1303 def magic_color_matches(self, text:str) -> List[str] :
1304 1304 """ Match color schemes for %colors magic"""
1305 1305 texts = text.split()
1306 1306 if text.endswith(' '):
1307 1307 # .split() strips off the trailing whitespace. Add '' back
1308 1308 # so that: '%colors ' -> ['%colors', '']
1309 1309 texts.append('')
1310 1310
1311 1311 if len(texts) == 2 and (texts[0] == 'colors' or texts[0] == '%colors'):
1312 1312 prefix = texts[1]
1313 1313 return [ color for color in InspectColors.keys()
1314 1314 if color.startswith(prefix) ]
1315 1315 return []
1316 1316
1317 1317 def _jedi_matches(self, cursor_column:int, cursor_line:int, text:str):
1318 1318 """
1319 1319
1320 1320 Return a list of :any:`jedi.api.Completions` object from a ``text`` and
1321 1321 cursor position.
1322 1322
1323 1323 Parameters
1324 1324 ----------
1325 1325 cursor_column : int
1326 1326 column position of the cursor in ``text``, 0-indexed.
1327 1327 cursor_line : int
1328 1328 line position of the cursor in ``text``, 0-indexed
1329 1329 text : str
1330 1330 text to complete
1331 1331
1332 1332 Debugging
1333 1333 ---------
1334 1334
1335 1335 If ``IPCompleter.debug`` is ``True`` may return a :any:`_FakeJediCompletion`
1336 1336 object containing a string with the Jedi debug information attached.
1337 1337 """
1338 1338 namespaces = [self.namespace]
1339 1339 if self.global_namespace is not None:
1340 1340 namespaces.append(self.global_namespace)
1341 1341
1342 1342 completion_filter = lambda x:x
1343 1343 offset = cursor_to_position(text, cursor_line, cursor_column)
1344 1344 # filter output if we are completing for object members
1345 1345 if offset:
1346 1346 pre = text[offset-1]
1347 1347 if pre == '.':
1348 1348 if self.omit__names == 2:
1349 1349 completion_filter = lambda c:not c.name.startswith('_')
1350 1350 elif self.omit__names == 1:
1351 1351 completion_filter = lambda c:not (c.name.startswith('__') and c.name.endswith('__'))
1352 1352 elif self.omit__names == 0:
1353 1353 completion_filter = lambda x:x
1354 1354 else:
1355 1355 raise ValueError("Don't understand self.omit__names == {}".format(self.omit__names))
1356 1356
1357 1357 interpreter = jedi.Interpreter(
1358 1358 text[:offset], namespaces, column=cursor_column, line=cursor_line + 1)
1359 1359 try_jedi = True
1360 1360
1361 1361 try:
1362 1362 # should we check the type of the node is Error ?
1363 1363 try:
1364 1364 # jedi < 0.11
1365 1365 from jedi.parser.tree import ErrorLeaf
1366 1366 except ImportError:
1367 1367 # jedi >= 0.11
1368 1368 from parso.tree import ErrorLeaf
1369 1369
1370 1370 next_to_last_tree = interpreter._get_module().tree_node.children[-2]
1371 1371 completing_string = False
1372 1372 if isinstance(next_to_last_tree, ErrorLeaf):
1373 1373 completing_string = next_to_last_tree.value.lstrip()[0] in {'"', "'"}
1374 1374 # if we are in a string jedi is likely not the right candidate for
1375 1375 # now. Skip it.
1376 1376 try_jedi = not completing_string
1377 1377 except Exception as e:
1378 1378 # many of things can go wrong, we are using private API just don't crash.
1379 1379 if self.debug:
1380 1380 print("Error detecting if completing a non-finished string :", e, '|')
1381 1381
1382 1382 if not try_jedi:
1383 1383 return []
1384 1384 try:
1385 1385 return filter(completion_filter, interpreter.completions())
1386 1386 except Exception as e:
1387 1387 if self.debug:
1388 1388 return [_FakeJediCompletion('Oops Jedi has crashed, please report a bug with the following:\n"""\n%s\ns"""' % (e))]
1389 1389 else:
1390 1390 return []
1391 1391
1392 1392 def python_matches(self, text):
1393 1393 """Match attributes or global python names"""
1394 1394 if "." in text:
1395 1395 try:
1396 1396 matches = self.attr_matches(text)
1397 1397 if text.endswith('.') and self.omit__names:
1398 1398 if self.omit__names == 1:
1399 1399 # true if txt is _not_ a __ name, false otherwise:
1400 1400 no__name = (lambda txt:
1401 1401 re.match(r'.*\.__.*?__',txt) is None)
1402 1402 else:
1403 1403 # true if txt is _not_ a _ name, false otherwise:
1404 1404 no__name = (lambda txt:
1405 1405 re.match(r'\._.*?',txt[txt.rindex('.'):]) is None)
1406 1406 matches = filter(no__name, matches)
1407 1407 except NameError:
1408 1408 # catches <undefined attributes>.<tab>
1409 1409 matches = []
1410 1410 else:
1411 1411 matches = self.global_matches(text)
1412 1412 return matches
1413 1413
1414 1414 def _default_arguments_from_docstring(self, doc):
1415 1415 """Parse the first line of docstring for call signature.
1416 1416
1417 1417 Docstring should be of the form 'min(iterable[, key=func])\n'.
1418 1418 It can also parse cython docstring of the form
1419 1419 'Minuit.migrad(self, int ncall=10000, resume=True, int nsplit=1)'.
1420 1420 """
1421 1421 if doc is None:
1422 1422 return []
1423 1423
1424 1424 #care only the firstline
1425 1425 line = doc.lstrip().splitlines()[0]
1426 1426
1427 1427 #p = re.compile(r'^[\w|\s.]+\(([^)]*)\).*')
1428 1428 #'min(iterable[, key=func])\n' -> 'iterable[, key=func]'
1429 1429 sig = self.docstring_sig_re.search(line)
1430 1430 if sig is None:
1431 1431 return []
1432 1432 # iterable[, key=func]' -> ['iterable[' ,' key=func]']
1433 1433 sig = sig.groups()[0].split(',')
1434 1434 ret = []
1435 1435 for s in sig:
1436 1436 #re.compile(r'[\s|\[]*(\w+)(?:\s*=\s*.*)')
1437 1437 ret += self.docstring_kwd_re.findall(s)
1438 1438 return ret
1439 1439
1440 1440 def _default_arguments(self, obj):
1441 1441 """Return the list of default arguments of obj if it is callable,
1442 1442 or empty list otherwise."""
1443 1443 call_obj = obj
1444 1444 ret = []
1445 1445 if inspect.isbuiltin(obj):
1446 1446 pass
1447 1447 elif not (inspect.isfunction(obj) or inspect.ismethod(obj)):
1448 1448 if inspect.isclass(obj):
1449 1449 #for cython embedsignature=True the constructor docstring
1450 1450 #belongs to the object itself not __init__
1451 1451 ret += self._default_arguments_from_docstring(
1452 1452 getattr(obj, '__doc__', ''))
1453 1453 # for classes, check for __init__,__new__
1454 1454 call_obj = (getattr(obj, '__init__', None) or
1455 1455 getattr(obj, '__new__', None))
1456 1456 # for all others, check if they are __call__able
1457 1457 elif hasattr(obj, '__call__'):
1458 1458 call_obj = obj.__call__
1459 1459 ret += self._default_arguments_from_docstring(
1460 1460 getattr(call_obj, '__doc__', ''))
1461 1461
1462 1462 _keeps = (inspect.Parameter.KEYWORD_ONLY,
1463 1463 inspect.Parameter.POSITIONAL_OR_KEYWORD)
1464 1464
1465 1465 try:
1466 1466 sig = inspect.signature(call_obj)
1467 1467 ret.extend(k for k, v in sig.parameters.items() if
1468 1468 v.kind in _keeps)
1469 1469 except ValueError:
1470 1470 pass
1471 1471
1472 1472 return list(set(ret))
1473 1473
1474 1474 def python_func_kw_matches(self,text):
1475 1475 """Match named parameters (kwargs) of the last open function"""
1476 1476
1477 1477 if "." in text: # a parameter cannot be dotted
1478 1478 return []
1479 1479 try: regexp = self.__funcParamsRegex
1480 1480 except AttributeError:
1481 1481 regexp = self.__funcParamsRegex = re.compile(r'''
1482 1482 '.*?(?<!\\)' | # single quoted strings or
1483 1483 ".*?(?<!\\)" | # double quoted strings or
1484 1484 \w+ | # identifier
1485 1485 \S # other characters
1486 1486 ''', re.VERBOSE | re.DOTALL)
1487 1487 # 1. find the nearest identifier that comes before an unclosed
1488 1488 # parenthesis before the cursor
1489 1489 # e.g. for "foo (1+bar(x), pa<cursor>,a=1)", the candidate is "foo"
1490 1490 tokens = regexp.findall(self.text_until_cursor)
1491 1491 iterTokens = reversed(tokens); openPar = 0
1492 1492
1493 1493 for token in iterTokens:
1494 1494 if token == ')':
1495 1495 openPar -= 1
1496 1496 elif token == '(':
1497 1497 openPar += 1
1498 1498 if openPar > 0:
1499 1499 # found the last unclosed parenthesis
1500 1500 break
1501 1501 else:
1502 1502 return []
1503 1503 # 2. Concatenate dotted names ("foo.bar" for "foo.bar(x, pa" )
1504 1504 ids = []
1505 1505 isId = re.compile(r'\w+$').match
1506 1506
1507 1507 while True:
1508 1508 try:
1509 1509 ids.append(next(iterTokens))
1510 1510 if not isId(ids[-1]):
1511 1511 ids.pop(); break
1512 1512 if not next(iterTokens) == '.':
1513 1513 break
1514 1514 except StopIteration:
1515 1515 break
1516 1516
1517 1517 # Find all named arguments already assigned to, as to avoid suggesting
1518 1518 # them again
1519 1519 usedNamedArgs = set()
1520 1520 par_level = -1
1521 1521 for token, next_token in zip(tokens, tokens[1:]):
1522 1522 if token == '(':
1523 1523 par_level += 1
1524 1524 elif token == ')':
1525 1525 par_level -= 1
1526 1526
1527 1527 if par_level != 0:
1528 1528 continue
1529 1529
1530 1530 if next_token != '=':
1531 1531 continue
1532 1532
1533 1533 usedNamedArgs.add(token)
1534 1534
1535 1535 # lookup the candidate callable matches either using global_matches
1536 1536 # or attr_matches for dotted names
1537 1537 if len(ids) == 1:
1538 1538 callableMatches = self.global_matches(ids[0])
1539 1539 else:
1540 1540 callableMatches = self.attr_matches('.'.join(ids[::-1]))
1541 1541 argMatches = []
1542 1542 for callableMatch in callableMatches:
1543 1543 try:
1544 1544 namedArgs = self._default_arguments(eval(callableMatch,
1545 1545 self.namespace))
1546 1546 except:
1547 1547 continue
1548 1548
1549 1549 # Remove used named arguments from the list, no need to show twice
1550 1550 for namedArg in set(namedArgs) - usedNamedArgs:
1551 1551 if namedArg.startswith(text):
1552 1552 argMatches.append(u"%s=" %namedArg)
1553 1553 return argMatches
1554 1554
1555 1555 def dict_key_matches(self, text):
1556 1556 "Match string keys in a dictionary, after e.g. 'foo[' "
1557 1557 def get_keys(obj):
1558 1558 # Objects can define their own completions by defining an
1559 1559 # _ipy_key_completions_() method.
1560 1560 method = get_real_method(obj, '_ipython_key_completions_')
1561 1561 if method is not None:
1562 1562 return method()
1563 1563
1564 1564 # Special case some common in-memory dict-like types
1565 1565 if isinstance(obj, dict) or\
1566 1566 _safe_isinstance(obj, 'pandas', 'DataFrame'):
1567 1567 try:
1568 1568 return list(obj.keys())
1569 1569 except Exception:
1570 1570 return []
1571 1571 elif _safe_isinstance(obj, 'numpy', 'ndarray') or\
1572 1572 _safe_isinstance(obj, 'numpy', 'void'):
1573 1573 return obj.dtype.names or []
1574 1574 return []
1575 1575
1576 1576 try:
1577 1577 regexps = self.__dict_key_regexps
1578 1578 except AttributeError:
1579 1579 dict_key_re_fmt = r'''(?x)
1580 1580 ( # match dict-referring expression wrt greedy setting
1581 1581 %s
1582 1582 )
1583 1583 \[ # open bracket
1584 1584 \s* # and optional whitespace
1585 1585 ([uUbB]? # string prefix (r not handled)
1586 1586 (?: # unclosed string
1587 1587 '(?:[^']|(?<!\\)\\')*
1588 1588 |
1589 1589 "(?:[^"]|(?<!\\)\\")*
1590 1590 )
1591 1591 )?
1592 1592 $
1593 1593 '''
1594 1594 regexps = self.__dict_key_regexps = {
1595 False: re.compile(dict_key_re_fmt % '''
1595 False: re.compile(dict_key_re_fmt % r'''
1596 1596 # identifiers separated by .
1597 1597 (?!\d)\w+
1598 1598 (?:\.(?!\d)\w+)*
1599 1599 '''),
1600 1600 True: re.compile(dict_key_re_fmt % '''
1601 1601 .+
1602 1602 ''')
1603 1603 }
1604 1604
1605 1605 match = regexps[self.greedy].search(self.text_until_cursor)
1606 1606 if match is None:
1607 1607 return []
1608 1608
1609 1609 expr, prefix = match.groups()
1610 1610 try:
1611 1611 obj = eval(expr, self.namespace)
1612 1612 except Exception:
1613 1613 try:
1614 1614 obj = eval(expr, self.global_namespace)
1615 1615 except Exception:
1616 1616 return []
1617 1617
1618 1618 keys = get_keys(obj)
1619 1619 if not keys:
1620 1620 return keys
1621 1621 closing_quote, token_offset, matches = match_dict_keys(keys, prefix, self.splitter.delims)
1622 1622 if not matches:
1623 1623 return matches
1624 1624
1625 1625 # get the cursor position of
1626 1626 # - the text being completed
1627 1627 # - the start of the key text
1628 1628 # - the start of the completion
1629 1629 text_start = len(self.text_until_cursor) - len(text)
1630 1630 if prefix:
1631 1631 key_start = match.start(2)
1632 1632 completion_start = key_start + token_offset
1633 1633 else:
1634 1634 key_start = completion_start = match.end()
1635 1635
1636 1636 # grab the leading prefix, to make sure all completions start with `text`
1637 1637 if text_start > key_start:
1638 1638 leading = ''
1639 1639 else:
1640 1640 leading = text[text_start:completion_start]
1641 1641
1642 1642 # the index of the `[` character
1643 1643 bracket_idx = match.end(1)
1644 1644
1645 1645 # append closing quote and bracket as appropriate
1646 1646 # this is *not* appropriate if the opening quote or bracket is outside
1647 1647 # the text given to this method
1648 1648 suf = ''
1649 1649 continuation = self.line_buffer[len(self.text_until_cursor):]
1650 1650 if key_start > text_start and closing_quote:
1651 1651 # quotes were opened inside text, maybe close them
1652 1652 if continuation.startswith(closing_quote):
1653 1653 continuation = continuation[len(closing_quote):]
1654 1654 else:
1655 1655 suf += closing_quote
1656 1656 if bracket_idx > text_start:
1657 1657 # brackets were opened inside text, maybe close them
1658 1658 if not continuation.startswith(']'):
1659 1659 suf += ']'
1660 1660
1661 1661 return [leading + k + suf for k in matches]
1662 1662
1663 1663 def unicode_name_matches(self, text):
1664 1664 u"""Match Latex-like syntax for unicode characters base
1665 1665 on the name of the character.
1666 1666
1667 1667 This does ``\\GREEK SMALL LETTER ETA`` -> ``Ξ·``
1668 1668
1669 1669 Works only on valid python 3 identifier, or on combining characters that
1670 1670 will combine to form a valid identifier.
1671 1671
1672 1672 Used on Python 3 only.
1673 1673 """
1674 1674 slashpos = text.rfind('\\')
1675 1675 if slashpos > -1:
1676 1676 s = text[slashpos+1:]
1677 1677 try :
1678 1678 unic = unicodedata.lookup(s)
1679 1679 # allow combining chars
1680 1680 if ('a'+unic).isidentifier():
1681 1681 return '\\'+s,[unic]
1682 1682 except KeyError:
1683 1683 pass
1684 1684 return u'', []
1685 1685
1686 1686
1687 1687 def latex_matches(self, text):
1688 1688 u"""Match Latex syntax for unicode characters.
1689 1689
1690 1690 This does both ``\\alp`` -> ``\\alpha`` and ``\\alpha`` -> ``Ξ±``
1691 1691
1692 1692 Used on Python 3 only.
1693 1693 """
1694 1694 slashpos = text.rfind('\\')
1695 1695 if slashpos > -1:
1696 1696 s = text[slashpos:]
1697 1697 if s in latex_symbols:
1698 1698 # Try to complete a full latex symbol to unicode
1699 1699 # \\alpha -> Ξ±
1700 1700 return s, [latex_symbols[s]]
1701 1701 else:
1702 1702 # If a user has partially typed a latex symbol, give them
1703 1703 # a full list of options \al -> [\aleph, \alpha]
1704 1704 matches = [k for k in latex_symbols if k.startswith(s)]
1705 1705 return s, matches
1706 1706 return u'', []
1707 1707
1708 1708 def dispatch_custom_completer(self, text):
1709 1709 if not self.custom_completers:
1710 1710 return
1711 1711
1712 1712 line = self.line_buffer
1713 1713 if not line.strip():
1714 1714 return None
1715 1715
1716 1716 # Create a little structure to pass all the relevant information about
1717 1717 # the current completion to any custom completer.
1718 1718 event = SimpleNamespace()
1719 1719 event.line = line
1720 1720 event.symbol = text
1721 1721 cmd = line.split(None,1)[0]
1722 1722 event.command = cmd
1723 1723 event.text_until_cursor = self.text_until_cursor
1724 1724
1725 1725 # for foo etc, try also to find completer for %foo
1726 1726 if not cmd.startswith(self.magic_escape):
1727 1727 try_magic = self.custom_completers.s_matches(
1728 1728 self.magic_escape + cmd)
1729 1729 else:
1730 1730 try_magic = []
1731 1731
1732 1732 for c in itertools.chain(self.custom_completers.s_matches(cmd),
1733 1733 try_magic,
1734 1734 self.custom_completers.flat_matches(self.text_until_cursor)):
1735 1735 try:
1736 1736 res = c(event)
1737 1737 if res:
1738 1738 # first, try case sensitive match
1739 1739 withcase = [r for r in res if r.startswith(text)]
1740 1740 if withcase:
1741 1741 return withcase
1742 1742 # if none, then case insensitive ones are ok too
1743 1743 text_low = text.lower()
1744 1744 return [r for r in res if r.lower().startswith(text_low)]
1745 1745 except TryNext:
1746 1746 pass
1747 1747 except KeyboardInterrupt:
1748 1748 """
1749 1749 If custom completer take too long,
1750 1750 let keyboard interrupt abort and return nothing.
1751 1751 """
1752 1752 break
1753 1753
1754 1754 return None
1755 1755
1756 1756 def completions(self, text: str, offset: int)->Iterator[Completion]:
1757 1757 """
1758 1758 Returns an iterator over the possible completions
1759 1759
1760 1760 .. warning:: Unstable
1761 1761
1762 1762 This function is unstable, API may change without warning.
1763 1763 It will also raise unless use in proper context manager.
1764 1764
1765 1765 Parameters
1766 1766 ----------
1767 1767
1768 1768 text:str
1769 1769 Full text of the current input, multi line string.
1770 1770 offset:int
1771 1771 Integer representing the position of the cursor in ``text``. Offset
1772 1772 is 0-based indexed.
1773 1773
1774 1774 Yields
1775 1775 ------
1776 1776 :any:`Completion` object
1777 1777
1778 1778
1779 1779 The cursor on a text can either be seen as being "in between"
1780 1780 characters or "On" a character depending on the interface visible to
1781 1781 the user. For consistency the cursor being on "in between" characters X
1782 1782 and Y is equivalent to the cursor being "on" character Y, that is to say
1783 1783 the character the cursor is on is considered as being after the cursor.
1784 1784
1785 1785 Combining characters may span more that one position in the
1786 1786 text.
1787 1787
1788 1788
1789 1789 .. note::
1790 1790
1791 1791 If ``IPCompleter.debug`` is :any:`True` will yield a ``--jedi/ipython--``
1792 1792 fake Completion token to distinguish completion returned by Jedi
1793 1793 and usual IPython completion.
1794 1794
1795 1795 .. note::
1796 1796
1797 1797 Completions are not completely deduplicated yet. If identical
1798 1798 completions are coming from different sources this function does not
1799 1799 ensure that each completion object will only be present once.
1800 1800 """
1801 1801 warnings.warn("_complete is a provisional API (as of IPython 6.0). "
1802 1802 "It may change without warnings. "
1803 1803 "Use in corresponding context manager.",
1804 1804 category=ProvisionalCompleterWarning, stacklevel=2)
1805 1805
1806 1806 seen = set()
1807 1807 try:
1808 1808 for c in self._completions(text, offset, _timeout=self.jedi_compute_type_timeout/1000):
1809 1809 if c and (c in seen):
1810 1810 continue
1811 1811 yield c
1812 1812 seen.add(c)
1813 1813 except KeyboardInterrupt:
1814 1814 """if completions take too long and users send keyboard interrupt,
1815 1815 do not crash and return ASAP. """
1816 1816 pass
1817 1817
1818 1818 def _completions(self, full_text: str, offset: int, *, _timeout)->Iterator[Completion]:
1819 1819 """
1820 1820 Core completion module.Same signature as :any:`completions`, with the
1821 1821 extra `timeout` parameter (in seconds).
1822 1822
1823 1823
1824 1824 Computing jedi's completion ``.type`` can be quite expensive (it is a
1825 1825 lazy property) and can require some warm-up, more warm up than just
1826 1826 computing the ``name`` of a completion. The warm-up can be :
1827 1827
1828 1828 - Long warm-up the first time a module is encountered after
1829 1829 install/update: actually build parse/inference tree.
1830 1830
1831 1831 - first time the module is encountered in a session: load tree from
1832 1832 disk.
1833 1833
1834 1834 We don't want to block completions for tens of seconds so we give the
1835 1835 completer a "budget" of ``_timeout`` seconds per invocation to compute
1836 1836 completions types, the completions that have not yet been computed will
1837 1837 be marked as "unknown" an will have a chance to be computed next round
1838 1838 are things get cached.
1839 1839
1840 1840 Keep in mind that Jedi is not the only thing treating the completion so
1841 1841 keep the timeout short-ish as if we take more than 0.3 second we still
1842 1842 have lots of processing to do.
1843 1843
1844 1844 """
1845 1845 deadline = time.monotonic() + _timeout
1846 1846
1847 1847
1848 1848 before = full_text[:offset]
1849 1849 cursor_line, cursor_column = position_to_cursor(full_text, offset)
1850 1850
1851 1851 matched_text, matches, matches_origin, jedi_matches = self._complete(
1852 1852 full_text=full_text, cursor_line=cursor_line, cursor_pos=cursor_column)
1853 1853
1854 1854 iter_jm = iter(jedi_matches)
1855 1855 if _timeout:
1856 1856 for jm in iter_jm:
1857 1857 try:
1858 1858 type_ = jm.type
1859 1859 except Exception:
1860 1860 if self.debug:
1861 1861 print("Error in Jedi getting type of ", jm)
1862 1862 type_ = None
1863 1863 delta = len(jm.name_with_symbols) - len(jm.complete)
1864 1864 if type_ == 'function':
1865 1865 signature = _make_signature(jm)
1866 1866 else:
1867 1867 signature = ''
1868 1868 yield Completion(start=offset - delta,
1869 1869 end=offset,
1870 1870 text=jm.name_with_symbols,
1871 1871 type=type_,
1872 1872 signature=signature,
1873 1873 _origin='jedi')
1874 1874
1875 1875 if time.monotonic() > deadline:
1876 1876 break
1877 1877
1878 1878 for jm in iter_jm:
1879 1879 delta = len(jm.name_with_symbols) - len(jm.complete)
1880 1880 yield Completion(start=offset - delta,
1881 1881 end=offset,
1882 1882 text=jm.name_with_symbols,
1883 1883 type='<unknown>', # don't compute type for speed
1884 1884 _origin='jedi',
1885 1885 signature='')
1886 1886
1887 1887
1888 1888 start_offset = before.rfind(matched_text)
1889 1889
1890 1890 # TODO:
1891 1891 # Suppress this, right now just for debug.
1892 1892 if jedi_matches and matches and self.debug:
1893 1893 yield Completion(start=start_offset, end=offset, text='--jedi/ipython--',
1894 1894 _origin='debug', type='none', signature='')
1895 1895
1896 1896 # I'm unsure if this is always true, so let's assert and see if it
1897 1897 # crash
1898 1898 assert before.endswith(matched_text)
1899 1899 for m, t in zip(matches, matches_origin):
1900 1900 yield Completion(start=start_offset, end=offset, text=m, _origin=t, signature='', type='<unknown>')
1901 1901
1902 1902
1903 1903 def complete(self, text=None, line_buffer=None, cursor_pos=None):
1904 1904 """Find completions for the given text and line context.
1905 1905
1906 1906 Note that both the text and the line_buffer are optional, but at least
1907 1907 one of them must be given.
1908 1908
1909 1909 Parameters
1910 1910 ----------
1911 1911 text : string, optional
1912 1912 Text to perform the completion on. If not given, the line buffer
1913 1913 is split using the instance's CompletionSplitter object.
1914 1914
1915 1915 line_buffer : string, optional
1916 1916 If not given, the completer attempts to obtain the current line
1917 1917 buffer via readline. This keyword allows clients which are
1918 1918 requesting for text completions in non-readline contexts to inform
1919 1919 the completer of the entire text.
1920 1920
1921 1921 cursor_pos : int, optional
1922 1922 Index of the cursor in the full line buffer. Should be provided by
1923 1923 remote frontends where kernel has no access to frontend state.
1924 1924
1925 1925 Returns
1926 1926 -------
1927 1927 text : str
1928 1928 Text that was actually used in the completion.
1929 1929
1930 1930 matches : list
1931 1931 A list of completion matches.
1932 1932
1933 1933
1934 1934 .. note::
1935 1935
1936 1936 This API is likely to be deprecated and replaced by
1937 1937 :any:`IPCompleter.completions` in the future.
1938 1938
1939 1939
1940 1940 """
1941 1941 warnings.warn('`Completer.complete` is pending deprecation since '
1942 1942 'IPython 6.0 and will be replaced by `Completer.completions`.',
1943 1943 PendingDeprecationWarning)
1944 1944 # potential todo, FOLD the 3rd throw away argument of _complete
1945 1945 # into the first 2 one.
1946 1946 return self._complete(line_buffer=line_buffer, cursor_pos=cursor_pos, text=text, cursor_line=0)[:2]
1947 1947
1948 1948 def _complete(self, *, cursor_line, cursor_pos, line_buffer=None, text=None,
1949 1949 full_text=None) -> Tuple[str, List[str], List[str], Iterable[_FakeJediCompletion]]:
1950 1950 """
1951 1951
1952 1952 Like complete but can also returns raw jedi completions as well as the
1953 1953 origin of the completion text. This could (and should) be made much
1954 1954 cleaner but that will be simpler once we drop the old (and stateful)
1955 1955 :any:`complete` API.
1956 1956
1957 1957
1958 1958 With current provisional API, cursor_pos act both (depending on the
1959 1959 caller) as the offset in the ``text`` or ``line_buffer``, or as the
1960 1960 ``column`` when passing multiline strings this could/should be renamed
1961 1961 but would add extra noise.
1962 1962 """
1963 1963
1964 1964 # if the cursor position isn't given, the only sane assumption we can
1965 1965 # make is that it's at the end of the line (the common case)
1966 1966 if cursor_pos is None:
1967 1967 cursor_pos = len(line_buffer) if text is None else len(text)
1968 1968
1969 1969 if self.use_main_ns:
1970 1970 self.namespace = __main__.__dict__
1971 1971
1972 1972 # if text is either None or an empty string, rely on the line buffer
1973 1973 if (not line_buffer) and full_text:
1974 1974 line_buffer = full_text.split('\n')[cursor_line]
1975 1975 if not text:
1976 1976 text = self.splitter.split_line(line_buffer, cursor_pos)
1977 1977
1978 1978 if self.backslash_combining_completions:
1979 1979 # allow deactivation of these on windows.
1980 1980 base_text = text if not line_buffer else line_buffer[:cursor_pos]
1981 1981 latex_text, latex_matches = self.latex_matches(base_text)
1982 1982 if latex_matches:
1983 1983 return latex_text, latex_matches, ['latex_matches']*len(latex_matches), ()
1984 1984 name_text = ''
1985 1985 name_matches = []
1986 1986 for meth in (self.unicode_name_matches, back_latex_name_matches, back_unicode_name_matches):
1987 1987 name_text, name_matches = meth(base_text)
1988 1988 if name_text:
1989 1989 return name_text, name_matches[:MATCHES_LIMIT], \
1990 1990 [meth.__qualname__]*min(len(name_matches), MATCHES_LIMIT), ()
1991 1991
1992 1992
1993 1993 # If no line buffer is given, assume the input text is all there was
1994 1994 if line_buffer is None:
1995 1995 line_buffer = text
1996 1996
1997 1997 self.line_buffer = line_buffer
1998 1998 self.text_until_cursor = self.line_buffer[:cursor_pos]
1999 1999
2000 2000 # Do magic arg matches
2001 2001 for matcher in self.magic_arg_matchers:
2002 2002 matches = list(matcher(line_buffer))[:MATCHES_LIMIT]
2003 2003 if matches:
2004 2004 origins = [matcher.__qualname__] * len(matches)
2005 2005 return text, matches, origins, ()
2006 2006
2007 2007 # Start with a clean slate of completions
2008 2008 matches = []
2009 2009 custom_res = self.dispatch_custom_completer(text)
2010 2010 # FIXME: we should extend our api to return a dict with completions for
2011 2011 # different types of objects. The rlcomplete() method could then
2012 2012 # simply collapse the dict into a list for readline, but we'd have
2013 2013 # richer completion semantics in other environments.
2014 2014 completions = ()
2015 2015 if self.use_jedi:
2016 2016 if not full_text:
2017 2017 full_text = line_buffer
2018 2018 completions = self._jedi_matches(
2019 2019 cursor_pos, cursor_line, full_text)
2020 2020 if custom_res is not None:
2021 2021 # did custom completers produce something?
2022 2022 matches = [(m, 'custom') for m in custom_res]
2023 2023 else:
2024 2024 # Extend the list of completions with the results of each
2025 2025 # matcher, so we return results to the user from all
2026 2026 # namespaces.
2027 2027 if self.merge_completions:
2028 2028 matches = []
2029 2029 for matcher in self.matchers:
2030 2030 try:
2031 2031 matches.extend([(m, matcher.__qualname__)
2032 2032 for m in matcher(text)])
2033 2033 except:
2034 2034 # Show the ugly traceback if the matcher causes an
2035 2035 # exception, but do NOT crash the kernel!
2036 2036 sys.excepthook(*sys.exc_info())
2037 2037 else:
2038 2038 for matcher in self.matchers:
2039 2039 matches = [(m, matcher.__qualname__)
2040 2040 for m in matcher(text)]
2041 2041 if matches:
2042 2042 break
2043 2043 seen = set()
2044 2044 filtered_matches = set()
2045 2045 for m in matches:
2046 2046 t, c = m
2047 2047 if t not in seen:
2048 2048 filtered_matches.add(m)
2049 2049 seen.add(t)
2050 2050
2051 2051 _filtered_matches = sorted(
2052 2052 set(filtered_matches), key=lambda x: completions_sorting_key(x[0]))\
2053 2053 [:MATCHES_LIMIT]
2054 2054
2055 2055 _matches = [m[0] for m in _filtered_matches]
2056 2056 origins = [m[1] for m in _filtered_matches]
2057 2057
2058 2058 self.matches = _matches
2059 2059
2060 2060 return text, _matches, origins, completions
@@ -1,645 +1,645 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Pdb debugger class.
4 4
5 5 Modified from the standard pdb.Pdb class to avoid including readline, so that
6 6 the command line completion of other programs which include this isn't
7 7 damaged.
8 8
9 9 In the future, this class will be expanded with improvements over the standard
10 10 pdb.
11 11
12 12 The code in this file is mainly lifted out of cmd.py in Python 2.2, with minor
13 13 changes. Licensing should therefore be under the standard Python terms. For
14 14 details on the PSF (Python Software Foundation) standard license, see:
15 15
16 16 https://docs.python.org/2/license.html
17 17 """
18 18
19 19 #*****************************************************************************
20 20 #
21 21 # This file is licensed under the PSF license.
22 22 #
23 23 # Copyright (C) 2001 Python Software Foundation, www.python.org
24 24 # Copyright (C) 2005-2006 Fernando Perez. <fperez@colorado.edu>
25 25 #
26 26 #
27 27 #*****************************************************************************
28 28
29 29 import bdb
30 30 import functools
31 31 import inspect
32 32 import linecache
33 33 import sys
34 34 import warnings
35 35 import re
36 36
37 37 from IPython import get_ipython
38 38 from IPython.utils import PyColorize
39 39 from IPython.utils import coloransi, py3compat
40 40 from IPython.core.excolors import exception_colors
41 41 from IPython.testing.skipdoctest import skip_doctest
42 42
43 43
44 44 prompt = 'ipdb> '
45 45
46 46 #We have to check this directly from sys.argv, config struct not yet available
47 47 from pdb import Pdb as OldPdb
48 48
49 49 # Allow the set_trace code to operate outside of an ipython instance, even if
50 50 # it does so with some limitations. The rest of this support is implemented in
51 51 # the Tracer constructor.
52 52
53 53 def make_arrow(pad):
54 54 """generate the leading arrow in front of traceback or debugger"""
55 55 if pad >= 2:
56 56 return '-'*(pad-2) + '> '
57 57 elif pad == 1:
58 58 return '>'
59 59 return ''
60 60
61 61
62 62 def BdbQuit_excepthook(et, ev, tb, excepthook=None):
63 63 """Exception hook which handles `BdbQuit` exceptions.
64 64
65 65 All other exceptions are processed using the `excepthook`
66 66 parameter.
67 67 """
68 68 warnings.warn("`BdbQuit_excepthook` is deprecated since version 5.1",
69 69 DeprecationWarning, stacklevel=2)
70 70 if et==bdb.BdbQuit:
71 71 print('Exiting Debugger.')
72 72 elif excepthook is not None:
73 73 excepthook(et, ev, tb)
74 74 else:
75 75 # Backwards compatibility. Raise deprecation warning?
76 76 BdbQuit_excepthook.excepthook_ori(et,ev,tb)
77 77
78 78
79 79 def BdbQuit_IPython_excepthook(self,et,ev,tb,tb_offset=None):
80 80 warnings.warn(
81 81 "`BdbQuit_IPython_excepthook` is deprecated since version 5.1",
82 82 DeprecationWarning, stacklevel=2)
83 83 print('Exiting Debugger.')
84 84
85 85
86 86 class Tracer(object):
87 87 """
88 88 DEPRECATED
89 89
90 90 Class for local debugging, similar to pdb.set_trace.
91 91
92 92 Instances of this class, when called, behave like pdb.set_trace, but
93 93 providing IPython's enhanced capabilities.
94 94
95 95 This is implemented as a class which must be initialized in your own code
96 96 and not as a standalone function because we need to detect at runtime
97 97 whether IPython is already active or not. That detection is done in the
98 98 constructor, ensuring that this code plays nicely with a running IPython,
99 99 while functioning acceptably (though with limitations) if outside of it.
100 100 """
101 101
102 102 @skip_doctest
103 103 def __init__(self, colors=None):
104 104 """
105 105 DEPRECATED
106 106
107 107 Create a local debugger instance.
108 108
109 109 Parameters
110 110 ----------
111 111
112 112 colors : str, optional
113 113 The name of the color scheme to use, it must be one of IPython's
114 114 valid color schemes. If not given, the function will default to
115 115 the current IPython scheme when running inside IPython, and to
116 116 'NoColor' otherwise.
117 117
118 118 Examples
119 119 --------
120 120 ::
121 121
122 122 from IPython.core.debugger import Tracer; debug_here = Tracer()
123 123
124 124 Later in your code::
125 125
126 126 debug_here() # -> will open up the debugger at that point.
127 127
128 128 Once the debugger activates, you can use all of its regular commands to
129 129 step through code, set breakpoints, etc. See the pdb documentation
130 130 from the Python standard library for usage details.
131 131 """
132 132 warnings.warn("`Tracer` is deprecated since version 5.1, directly use "
133 133 "`IPython.core.debugger.Pdb.set_trace()`",
134 134 DeprecationWarning, stacklevel=2)
135 135
136 136 ip = get_ipython()
137 137 if ip is None:
138 138 # Outside of ipython, we set our own exception hook manually
139 139 sys.excepthook = functools.partial(BdbQuit_excepthook,
140 140 excepthook=sys.excepthook)
141 141 def_colors = 'NoColor'
142 142 else:
143 143 # In ipython, we use its custom exception handler mechanism
144 144 def_colors = ip.colors
145 145 ip.set_custom_exc((bdb.BdbQuit,), BdbQuit_IPython_excepthook)
146 146
147 147 if colors is None:
148 148 colors = def_colors
149 149
150 150 # The stdlib debugger internally uses a modified repr from the `repr`
151 151 # module, that limits the length of printed strings to a hardcoded
152 152 # limit of 30 characters. That much trimming is too aggressive, let's
153 153 # at least raise that limit to 80 chars, which should be enough for
154 154 # most interactive uses.
155 155 try:
156 156 try:
157 157 from reprlib import aRepr # Py 3
158 158 except ImportError:
159 159 from repr import aRepr # Py 2
160 160 aRepr.maxstring = 80
161 161 except:
162 162 # This is only a user-facing convenience, so any error we encounter
163 163 # here can be warned about but can be otherwise ignored. These
164 164 # printouts will tell us about problems if this API changes
165 165 import traceback
166 166 traceback.print_exc()
167 167
168 168 self.debugger = Pdb(colors)
169 169
170 170 def __call__(self):
171 171 """Starts an interactive debugger at the point where called.
172 172
173 173 This is similar to the pdb.set_trace() function from the std lib, but
174 174 using IPython's enhanced debugger."""
175 175
176 176 self.debugger.set_trace(sys._getframe().f_back)
177 177
178 178
179 RGX_EXTRA_INDENT = re.compile('(?<=\n)\s+')
179 RGX_EXTRA_INDENT = re.compile(r'(?<=\n)\s+')
180 180
181 181
182 182 def strip_indentation(multiline_string):
183 183 return RGX_EXTRA_INDENT.sub('', multiline_string)
184 184
185 185
186 186 def decorate_fn_with_doc(new_fn, old_fn, additional_text=""):
187 187 """Make new_fn have old_fn's doc string. This is particularly useful
188 188 for the ``do_...`` commands that hook into the help system.
189 189 Adapted from from a comp.lang.python posting
190 190 by Duncan Booth."""
191 191 def wrapper(*args, **kw):
192 192 return new_fn(*args, **kw)
193 193 if old_fn.__doc__:
194 194 wrapper.__doc__ = strip_indentation(old_fn.__doc__) + additional_text
195 195 return wrapper
196 196
197 197
198 198 def _file_lines(fname):
199 199 """Return the contents of a named file as a list of lines.
200 200
201 201 This function never raises an IOError exception: if the file can't be
202 202 read, it simply returns an empty list."""
203 203
204 204 try:
205 205 outfile = open(fname)
206 206 except IOError:
207 207 return []
208 208 else:
209 209 out = outfile.readlines()
210 210 outfile.close()
211 211 return out
212 212
213 213
214 214 class Pdb(OldPdb):
215 215 """Modified Pdb class, does not load readline.
216 216
217 217 for a standalone version that uses prompt_toolkit, see
218 218 `IPython.terminal.debugger.TerminalPdb` and
219 219 `IPython.terminal.debugger.set_trace()`
220 220 """
221 221
222 222 def __init__(self, color_scheme=None, completekey=None,
223 223 stdin=None, stdout=None, context=5):
224 224
225 225 # Parent constructor:
226 226 try:
227 227 self.context = int(context)
228 228 if self.context <= 0:
229 229 raise ValueError("Context must be a positive integer")
230 230 except (TypeError, ValueError):
231 231 raise ValueError("Context must be a positive integer")
232 232
233 233 OldPdb.__init__(self, completekey, stdin, stdout)
234 234
235 235 # IPython changes...
236 236 self.shell = get_ipython()
237 237
238 238 if self.shell is None:
239 239 save_main = sys.modules['__main__']
240 240 # No IPython instance running, we must create one
241 241 from IPython.terminal.interactiveshell import \
242 242 TerminalInteractiveShell
243 243 self.shell = TerminalInteractiveShell.instance()
244 244 # needed by any code which calls __import__("__main__") after
245 245 # the debugger was entered. See also #9941.
246 246 sys.modules['__main__'] = save_main
247 247
248 248 if color_scheme is not None:
249 249 warnings.warn(
250 250 "The `color_scheme` argument is deprecated since version 5.1",
251 251 DeprecationWarning, stacklevel=2)
252 252 else:
253 253 color_scheme = self.shell.colors
254 254
255 255 self.aliases = {}
256 256
257 257 # Create color table: we copy the default one from the traceback
258 258 # module and add a few attributes needed for debugging
259 259 self.color_scheme_table = exception_colors()
260 260
261 261 # shorthands
262 262 C = coloransi.TermColors
263 263 cst = self.color_scheme_table
264 264
265 265 cst['NoColor'].colors.prompt = C.NoColor
266 266 cst['NoColor'].colors.breakpoint_enabled = C.NoColor
267 267 cst['NoColor'].colors.breakpoint_disabled = C.NoColor
268 268
269 269 cst['Linux'].colors.prompt = C.Green
270 270 cst['Linux'].colors.breakpoint_enabled = C.LightRed
271 271 cst['Linux'].colors.breakpoint_disabled = C.Red
272 272
273 273 cst['LightBG'].colors.prompt = C.Blue
274 274 cst['LightBG'].colors.breakpoint_enabled = C.LightRed
275 275 cst['LightBG'].colors.breakpoint_disabled = C.Red
276 276
277 277 cst['Neutral'].colors.prompt = C.Blue
278 278 cst['Neutral'].colors.breakpoint_enabled = C.LightRed
279 279 cst['Neutral'].colors.breakpoint_disabled = C.Red
280 280
281 281
282 282 # Add a python parser so we can syntax highlight source while
283 283 # debugging.
284 284 self.parser = PyColorize.Parser(style=color_scheme)
285 285 self.set_colors(color_scheme)
286 286
287 287 # Set the prompt - the default prompt is '(Pdb)'
288 288 self.prompt = prompt
289 289
290 290 def set_colors(self, scheme):
291 291 """Shorthand access to the color table scheme selector method."""
292 292 self.color_scheme_table.set_active_scheme(scheme)
293 293 self.parser.style = scheme
294 294
295 295 def interaction(self, frame, traceback):
296 296 try:
297 297 OldPdb.interaction(self, frame, traceback)
298 298 except KeyboardInterrupt:
299 299 sys.stdout.write('\n' + self.shell.get_exception_only())
300 300
301 301 def new_do_up(self, arg):
302 302 OldPdb.do_up(self, arg)
303 303 do_u = do_up = decorate_fn_with_doc(new_do_up, OldPdb.do_up)
304 304
305 305 def new_do_down(self, arg):
306 306 OldPdb.do_down(self, arg)
307 307
308 308 do_d = do_down = decorate_fn_with_doc(new_do_down, OldPdb.do_down)
309 309
310 310 def new_do_frame(self, arg):
311 311 OldPdb.do_frame(self, arg)
312 312
313 313 def new_do_quit(self, arg):
314 314
315 315 if hasattr(self, 'old_all_completions'):
316 316 self.shell.Completer.all_completions=self.old_all_completions
317 317
318 318 return OldPdb.do_quit(self, arg)
319 319
320 320 do_q = do_quit = decorate_fn_with_doc(new_do_quit, OldPdb.do_quit)
321 321
322 322 def new_do_restart(self, arg):
323 323 """Restart command. In the context of ipython this is exactly the same
324 324 thing as 'quit'."""
325 325 self.msg("Restart doesn't make sense here. Using 'quit' instead.")
326 326 return self.do_quit(arg)
327 327
328 328 def print_stack_trace(self, context=None):
329 329 if context is None:
330 330 context = self.context
331 331 try:
332 332 context=int(context)
333 333 if context <= 0:
334 334 raise ValueError("Context must be a positive integer")
335 335 except (TypeError, ValueError):
336 336 raise ValueError("Context must be a positive integer")
337 337 try:
338 338 for frame_lineno in self.stack:
339 339 self.print_stack_entry(frame_lineno, context=context)
340 340 except KeyboardInterrupt:
341 341 pass
342 342
343 343 def print_stack_entry(self,frame_lineno, prompt_prefix='\n-> ',
344 344 context=None):
345 345 if context is None:
346 346 context = self.context
347 347 try:
348 348 context=int(context)
349 349 if context <= 0:
350 350 raise ValueError("Context must be a positive integer")
351 351 except (TypeError, ValueError):
352 352 raise ValueError("Context must be a positive integer")
353 353 print(self.format_stack_entry(frame_lineno, '', context))
354 354
355 355 # vds: >>
356 356 frame, lineno = frame_lineno
357 357 filename = frame.f_code.co_filename
358 358 self.shell.hooks.synchronize_with_editor(filename, lineno, 0)
359 359 # vds: <<
360 360
361 361 def format_stack_entry(self, frame_lineno, lprefix=': ', context=None):
362 362 if context is None:
363 363 context = self.context
364 364 try:
365 365 context=int(context)
366 366 if context <= 0:
367 367 print("Context must be a positive integer")
368 368 except (TypeError, ValueError):
369 369 print("Context must be a positive integer")
370 370 try:
371 371 import reprlib # Py 3
372 372 except ImportError:
373 373 import repr as reprlib # Py 2
374 374
375 375 ret = []
376 376
377 377 Colors = self.color_scheme_table.active_colors
378 378 ColorsNormal = Colors.Normal
379 379 tpl_link = u'%s%%s%s' % (Colors.filenameEm, ColorsNormal)
380 380 tpl_call = u'%s%%s%s%%s%s' % (Colors.vName, Colors.valEm, ColorsNormal)
381 381 tpl_line = u'%%s%s%%s %s%%s' % (Colors.lineno, ColorsNormal)
382 382 tpl_line_em = u'%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line,
383 383 ColorsNormal)
384 384
385 385 frame, lineno = frame_lineno
386 386
387 387 return_value = ''
388 388 if '__return__' in frame.f_locals:
389 389 rv = frame.f_locals['__return__']
390 390 #return_value += '->'
391 391 return_value += reprlib.repr(rv) + '\n'
392 392 ret.append(return_value)
393 393
394 394 #s = filename + '(' + `lineno` + ')'
395 395 filename = self.canonic(frame.f_code.co_filename)
396 396 link = tpl_link % py3compat.cast_unicode(filename)
397 397
398 398 if frame.f_code.co_name:
399 399 func = frame.f_code.co_name
400 400 else:
401 401 func = "<lambda>"
402 402
403 403 call = ''
404 404 if func != '?':
405 405 if '__args__' in frame.f_locals:
406 406 args = reprlib.repr(frame.f_locals['__args__'])
407 407 else:
408 408 args = '()'
409 409 call = tpl_call % (func, args)
410 410
411 411 # The level info should be generated in the same format pdb uses, to
412 412 # avoid breaking the pdbtrack functionality of python-mode in *emacs.
413 413 if frame is self.curframe:
414 414 ret.append('> ')
415 415 else:
416 416 ret.append(' ')
417 417 ret.append(u'%s(%s)%s\n' % (link,lineno,call))
418 418
419 419 start = lineno - 1 - context//2
420 420 lines = linecache.getlines(filename)
421 421 start = min(start, len(lines) - context)
422 422 start = max(start, 0)
423 423 lines = lines[start : start + context]
424 424
425 425 for i,line in enumerate(lines):
426 426 show_arrow = (start + 1 + i == lineno)
427 427 linetpl = (frame is self.curframe or show_arrow) \
428 428 and tpl_line_em \
429 429 or tpl_line
430 430 ret.append(self.__format_line(linetpl, filename,
431 431 start + 1 + i, line,
432 432 arrow = show_arrow) )
433 433 return ''.join(ret)
434 434
435 435 def __format_line(self, tpl_line, filename, lineno, line, arrow = False):
436 436 bp_mark = ""
437 437 bp_mark_color = ""
438 438
439 439 new_line, err = self.parser.format2(line, 'str')
440 440 if not err:
441 441 line = new_line
442 442
443 443 bp = None
444 444 if lineno in self.get_file_breaks(filename):
445 445 bps = self.get_breaks(filename, lineno)
446 446 bp = bps[-1]
447 447
448 448 if bp:
449 449 Colors = self.color_scheme_table.active_colors
450 450 bp_mark = str(bp.number)
451 451 bp_mark_color = Colors.breakpoint_enabled
452 452 if not bp.enabled:
453 453 bp_mark_color = Colors.breakpoint_disabled
454 454
455 455 numbers_width = 7
456 456 if arrow:
457 457 # This is the line with the error
458 458 pad = numbers_width - len(str(lineno)) - len(bp_mark)
459 459 num = '%s%s' % (make_arrow(pad), str(lineno))
460 460 else:
461 461 num = '%*s' % (numbers_width - len(bp_mark), str(lineno))
462 462
463 463 return tpl_line % (bp_mark_color + bp_mark, num, line)
464 464
465 465
466 466 def print_list_lines(self, filename, first, last):
467 467 """The printing (as opposed to the parsing part of a 'list'
468 468 command."""
469 469 try:
470 470 Colors = self.color_scheme_table.active_colors
471 471 ColorsNormal = Colors.Normal
472 472 tpl_line = '%%s%s%%s %s%%s' % (Colors.lineno, ColorsNormal)
473 473 tpl_line_em = '%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line, ColorsNormal)
474 474 src = []
475 475 if filename == "<string>" and hasattr(self, "_exec_filename"):
476 476 filename = self._exec_filename
477 477
478 478 for lineno in range(first, last+1):
479 479 line = linecache.getline(filename, lineno)
480 480 if not line:
481 481 break
482 482
483 483 if lineno == self.curframe.f_lineno:
484 484 line = self.__format_line(tpl_line_em, filename, lineno, line, arrow = True)
485 485 else:
486 486 line = self.__format_line(tpl_line, filename, lineno, line, arrow = False)
487 487
488 488 src.append(line)
489 489 self.lineno = lineno
490 490
491 491 print(''.join(src))
492 492
493 493 except KeyboardInterrupt:
494 494 pass
495 495
496 496 def do_list(self, arg):
497 497 """Print lines of code from the current stack frame
498 498 """
499 499 self.lastcmd = 'list'
500 500 last = None
501 501 if arg:
502 502 try:
503 503 x = eval(arg, {}, {})
504 504 if type(x) == type(()):
505 505 first, last = x
506 506 first = int(first)
507 507 last = int(last)
508 508 if last < first:
509 509 # Assume it's a count
510 510 last = first + last
511 511 else:
512 512 first = max(1, int(x) - 5)
513 513 except:
514 514 print('*** Error in argument:', repr(arg))
515 515 return
516 516 elif self.lineno is None:
517 517 first = max(1, self.curframe.f_lineno - 5)
518 518 else:
519 519 first = self.lineno + 1
520 520 if last is None:
521 521 last = first + 10
522 522 self.print_list_lines(self.curframe.f_code.co_filename, first, last)
523 523
524 524 # vds: >>
525 525 lineno = first
526 526 filename = self.curframe.f_code.co_filename
527 527 self.shell.hooks.synchronize_with_editor(filename, lineno, 0)
528 528 # vds: <<
529 529
530 530 do_l = do_list
531 531
532 532 def getsourcelines(self, obj):
533 533 lines, lineno = inspect.findsource(obj)
534 534 if inspect.isframe(obj) and obj.f_globals is obj.f_locals:
535 535 # must be a module frame: do not try to cut a block out of it
536 536 return lines, 1
537 537 elif inspect.ismodule(obj):
538 538 return lines, 1
539 539 return inspect.getblock(lines[lineno:]), lineno+1
540 540
541 541 def do_longlist(self, arg):
542 542 """Print lines of code from the current stack frame.
543 543
544 544 Shows more lines than 'list' does.
545 545 """
546 546 self.lastcmd = 'longlist'
547 547 try:
548 548 lines, lineno = self.getsourcelines(self.curframe)
549 549 except OSError as err:
550 550 self.error(err)
551 551 return
552 552 last = lineno + len(lines)
553 553 self.print_list_lines(self.curframe.f_code.co_filename, lineno, last)
554 554 do_ll = do_longlist
555 555
556 556 def do_debug(self, arg):
557 557 """debug code
558 558 Enter a recursive debugger that steps through the code
559 559 argument (which is an arbitrary expression or statement to be
560 560 executed in the current environment).
561 561 """
562 562 sys.settrace(None)
563 563 globals = self.curframe.f_globals
564 564 locals = self.curframe_locals
565 565 p = self.__class__(completekey=self.completekey,
566 566 stdin=self.stdin, stdout=self.stdout)
567 567 p.use_rawinput = self.use_rawinput
568 568 p.prompt = "(%s) " % self.prompt.strip()
569 569 self.message("ENTERING RECURSIVE DEBUGGER")
570 570 sys.call_tracing(p.run, (arg, globals, locals))
571 571 self.message("LEAVING RECURSIVE DEBUGGER")
572 572 sys.settrace(self.trace_dispatch)
573 573 self.lastcmd = p.lastcmd
574 574
575 575 def do_pdef(self, arg):
576 576 """Print the call signature for any callable object.
577 577
578 578 The debugger interface to %pdef"""
579 579 namespaces = [('Locals', self.curframe.f_locals),
580 580 ('Globals', self.curframe.f_globals)]
581 581 self.shell.find_line_magic('pdef')(arg, namespaces=namespaces)
582 582
583 583 def do_pdoc(self, arg):
584 584 """Print the docstring for an object.
585 585
586 586 The debugger interface to %pdoc."""
587 587 namespaces = [('Locals', self.curframe.f_locals),
588 588 ('Globals', self.curframe.f_globals)]
589 589 self.shell.find_line_magic('pdoc')(arg, namespaces=namespaces)
590 590
591 591 def do_pfile(self, arg):
592 592 """Print (or run through pager) the file where an object is defined.
593 593
594 594 The debugger interface to %pfile.
595 595 """
596 596 namespaces = [('Locals', self.curframe.f_locals),
597 597 ('Globals', self.curframe.f_globals)]
598 598 self.shell.find_line_magic('pfile')(arg, namespaces=namespaces)
599 599
600 600 def do_pinfo(self, arg):
601 601 """Provide detailed information about an object.
602 602
603 603 The debugger interface to %pinfo, i.e., obj?."""
604 604 namespaces = [('Locals', self.curframe.f_locals),
605 605 ('Globals', self.curframe.f_globals)]
606 606 self.shell.find_line_magic('pinfo')(arg, namespaces=namespaces)
607 607
608 608 def do_pinfo2(self, arg):
609 609 """Provide extra detailed information about an object.
610 610
611 611 The debugger interface to %pinfo2, i.e., obj??."""
612 612 namespaces = [('Locals', self.curframe.f_locals),
613 613 ('Globals', self.curframe.f_globals)]
614 614 self.shell.find_line_magic('pinfo2')(arg, namespaces=namespaces)
615 615
616 616 def do_psource(self, arg):
617 617 """Print (or run through pager) the source code for an object."""
618 618 namespaces = [('Locals', self.curframe.f_locals),
619 619 ('Globals', self.curframe.f_globals)]
620 620 self.shell.find_line_magic('psource')(arg, namespaces=namespaces)
621 621
622 622 def do_where(self, arg):
623 623 """w(here)
624 624 Print a stack trace, with the most recent frame at the bottom.
625 625 An arrow indicates the "current frame", which determines the
626 626 context of most commands. 'bt' is an alias for this command.
627 627
628 628 Take a number as argument as an (optional) number of context line to
629 629 print"""
630 630 if arg:
631 631 context = int(arg)
632 632 self.print_stack_trace(context)
633 633 else:
634 634 self.print_stack_trace()
635 635
636 636 do_w = do_where
637 637
638 638
639 639 def set_trace(frame=None):
640 640 """
641 641 Start debugging from `frame`.
642 642
643 643 If frame is not specified, debugging starts from caller's frame.
644 644 """
645 645 Pdb().set_trace(frame or sys._getframe().f_back)
@@ -1,766 +1,766 b''
1 1 """Input handling and transformation machinery.
2 2
3 3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 4 input from a line-oriented frontend is complete and should be executed, and when
5 5 the user should be prompted for another line of code instead. The name 'input
6 6 splitter' is largely for historical reasons.
7 7
8 8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 9 with full support for the extended IPython syntax (magics, system calls, etc).
10 10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 12 and stores the results.
13 13
14 14 For more details, see the class docstrings below.
15 15 """
16 16
17 17 # Copyright (c) IPython Development Team.
18 18 # Distributed under the terms of the Modified BSD License.
19 19 import ast
20 20 import codeop
21 21 import io
22 22 import re
23 23 import sys
24 24 import tokenize
25 25 import warnings
26 26
27 27 from IPython.utils.py3compat import cast_unicode
28 28 from IPython.core.inputtransformer import (leading_indent,
29 29 classic_prompt,
30 30 ipy_prompt,
31 31 cellmagic,
32 32 assemble_logical_lines,
33 33 help_end,
34 34 escaped_commands,
35 35 assign_from_magic,
36 36 assign_from_system,
37 37 assemble_python_lines,
38 38 )
39 39
40 40 # These are available in this module for backwards compatibility.
41 41 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
42 42 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
43 43 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
44 44
45 45 #-----------------------------------------------------------------------------
46 46 # Utilities
47 47 #-----------------------------------------------------------------------------
48 48
49 49 # FIXME: These are general-purpose utilities that later can be moved to the
50 50 # general ward. Kept here for now because we're being very strict about test
51 51 # coverage with this code, and this lets us ensure that we keep 100% coverage
52 52 # while developing.
53 53
54 54 # compiled regexps for autoindent management
55 55 dedent_re = re.compile('|'.join([
56 56 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
57 57 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
58 58 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
59 59 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
60 60 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
61 61 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
62 62 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
63 63 ]))
64 64 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
65 65
66 66 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
67 67 # before pure comments
68 comment_line_re = re.compile('^\s*\#')
68 comment_line_re = re.compile(r'^\s*\#')
69 69
70 70
71 71 def num_ini_spaces(s):
72 72 """Return the number of initial spaces in a string.
73 73
74 74 Note that tabs are counted as a single space. For now, we do *not* support
75 75 mixing of tabs and spaces in the user's input.
76 76
77 77 Parameters
78 78 ----------
79 79 s : string
80 80
81 81 Returns
82 82 -------
83 83 n : int
84 84 """
85 85
86 86 ini_spaces = ini_spaces_re.match(s)
87 87 if ini_spaces:
88 88 return ini_spaces.end()
89 89 else:
90 90 return 0
91 91
92 92 # Fake token types for partial_tokenize:
93 93 INCOMPLETE_STRING = tokenize.N_TOKENS
94 94 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95 95
96 96 # The 2 classes below have the same API as TokenInfo, but don't try to look up
97 97 # a token type name that they won't find.
98 98 class IncompleteString:
99 99 type = exact_type = INCOMPLETE_STRING
100 100 def __init__(self, s, start, end, line):
101 101 self.s = s
102 102 self.start = start
103 103 self.end = end
104 104 self.line = line
105 105
106 106 class InMultilineStatement:
107 107 type = exact_type = IN_MULTILINE_STATEMENT
108 108 def __init__(self, pos, line):
109 109 self.s = ''
110 110 self.start = self.end = pos
111 111 self.line = line
112 112
113 113 def partial_tokens(s):
114 114 """Iterate over tokens from a possibly-incomplete string of code.
115 115
116 116 This adds two special token types: INCOMPLETE_STRING and
117 117 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118 118 represent the two main ways for code to be incomplete.
119 119 """
120 120 readline = io.StringIO(s).readline
121 121 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122 122 try:
123 123 for token in tokenize.generate_tokens(readline):
124 124 yield token
125 125 except tokenize.TokenError as e:
126 126 # catch EOF error
127 127 lines = s.splitlines(keepends=True)
128 128 end = len(lines), len(lines[-1])
129 129 if 'multi-line string' in e.args[0]:
130 130 l, c = start = token.end
131 131 s = lines[l-1][c:] + ''.join(lines[l:])
132 132 yield IncompleteString(s, start, end, lines[-1])
133 133 elif 'multi-line statement' in e.args[0]:
134 134 yield InMultilineStatement(end, lines[-1])
135 135 else:
136 136 raise
137 137
138 138 def find_next_indent(code):
139 139 """Find the number of spaces for the next line of indentation"""
140 140 tokens = list(partial_tokens(code))
141 141 if tokens[-1].type == tokenize.ENDMARKER:
142 142 tokens.pop()
143 143 if not tokens:
144 144 return 0
145 145 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
146 146 tokens.pop()
147 147
148 148 if tokens[-1].type == INCOMPLETE_STRING:
149 149 # Inside a multiline string
150 150 return 0
151 151
152 152 # Find the indents used before
153 153 prev_indents = [0]
154 154 def _add_indent(n):
155 155 if n != prev_indents[-1]:
156 156 prev_indents.append(n)
157 157
158 158 tokiter = iter(tokens)
159 159 for tok in tokiter:
160 160 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161 161 _add_indent(tok.end[1])
162 162 elif (tok.type == tokenize.NL):
163 163 try:
164 164 _add_indent(next(tokiter).start[1])
165 165 except StopIteration:
166 166 break
167 167
168 168 last_indent = prev_indents.pop()
169 169
170 170 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
171 171 if tokens[-1].type == IN_MULTILINE_STATEMENT:
172 172 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
173 173 return last_indent + 4
174 174 return last_indent
175 175
176 176 if tokens[-1].exact_type == tokenize.COLON:
177 177 # Line ends with colon - indent
178 178 return last_indent + 4
179 179
180 180 if last_indent:
181 181 # Examine the last line for dedent cues - statements like return or
182 182 # raise which normally end a block of code.
183 183 last_line_starts = 0
184 184 for i, tok in enumerate(tokens):
185 185 if tok.type == tokenize.NEWLINE:
186 186 last_line_starts = i + 1
187 187
188 188 last_line_tokens = tokens[last_line_starts:]
189 189 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
190 190 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
191 191 # Find the most recent indentation less than the current level
192 192 for indent in reversed(prev_indents):
193 193 if indent < last_indent:
194 194 return indent
195 195
196 196 return last_indent
197 197
198 198
199 199 def last_blank(src):
200 200 """Determine if the input source ends in a blank.
201 201
202 202 A blank is either a newline or a line consisting of whitespace.
203 203
204 204 Parameters
205 205 ----------
206 206 src : string
207 207 A single or multiline string.
208 208 """
209 209 if not src: return False
210 210 ll = src.splitlines()[-1]
211 211 return (ll == '') or ll.isspace()
212 212
213 213
214 214 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
215 215 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
216 216
217 217 def last_two_blanks(src):
218 218 """Determine if the input source ends in two blanks.
219 219
220 220 A blank is either a newline or a line consisting of whitespace.
221 221
222 222 Parameters
223 223 ----------
224 224 src : string
225 225 A single or multiline string.
226 226 """
227 227 if not src: return False
228 228 # The logic here is tricky: I couldn't get a regexp to work and pass all
229 229 # the tests, so I took a different approach: split the source by lines,
230 230 # grab the last two and prepend '###\n' as a stand-in for whatever was in
231 231 # the body before the last two lines. Then, with that structure, it's
232 232 # possible to analyze with two regexps. Not the most elegant solution, but
233 233 # it works. If anyone tries to change this logic, make sure to validate
234 234 # the whole test suite first!
235 235 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
236 236 return (bool(last_two_blanks_re.match(new_src)) or
237 237 bool(last_two_blanks_re2.match(new_src)) )
238 238
239 239
240 240 def remove_comments(src):
241 241 """Remove all comments from input source.
242 242
243 243 Note: comments are NOT recognized inside of strings!
244 244
245 245 Parameters
246 246 ----------
247 247 src : string
248 248 A single or multiline input string.
249 249
250 250 Returns
251 251 -------
252 252 String with all Python comments removed.
253 253 """
254 254
255 255 return re.sub('#.*', '', src)
256 256
257 257
258 258 def get_input_encoding():
259 259 """Return the default standard input encoding.
260 260
261 261 If sys.stdin has no encoding, 'ascii' is returned."""
262 262 # There are strange environments for which sys.stdin.encoding is None. We
263 263 # ensure that a valid encoding is returned.
264 264 encoding = getattr(sys.stdin, 'encoding', None)
265 265 if encoding is None:
266 266 encoding = 'ascii'
267 267 return encoding
268 268
269 269 #-----------------------------------------------------------------------------
270 270 # Classes and functions for normal Python syntax handling
271 271 #-----------------------------------------------------------------------------
272 272
273 273 class InputSplitter(object):
274 274 r"""An object that can accumulate lines of Python source before execution.
275 275
276 276 This object is designed to be fed python source line-by-line, using
277 277 :meth:`push`. It will return on each push whether the currently pushed
278 278 code could be executed already. In addition, it provides a method called
279 279 :meth:`push_accepts_more` that can be used to query whether more input
280 280 can be pushed into a single interactive block.
281 281
282 282 This is a simple example of how an interactive terminal-based client can use
283 283 this tool::
284 284
285 285 isp = InputSplitter()
286 286 while isp.push_accepts_more():
287 287 indent = ' '*isp.indent_spaces
288 288 prompt = '>>> ' + indent
289 289 line = indent + raw_input(prompt)
290 290 isp.push(line)
291 291 print 'Input source was:\n', isp.source_reset(),
292 292 """
293 293 # A cache for storing the current indentation
294 294 # The first value stores the most recently processed source input
295 295 # The second value is the number of spaces for the current indentation
296 296 # If self.source matches the first value, the second value is a valid
297 297 # current indentation. Otherwise, the cache is invalid and the indentation
298 298 # must be recalculated.
299 299 _indent_spaces_cache = None, None
300 300 # String, indicating the default input encoding. It is computed by default
301 301 # at initialization time via get_input_encoding(), but it can be reset by a
302 302 # client with specific knowledge of the encoding.
303 303 encoding = ''
304 304 # String where the current full source input is stored, properly encoded.
305 305 # Reading this attribute is the normal way of querying the currently pushed
306 306 # source code, that has been properly encoded.
307 307 source = ''
308 308 # Code object corresponding to the current source. It is automatically
309 309 # synced to the source, so it can be queried at any time to obtain the code
310 310 # object; it will be None if the source doesn't compile to valid Python.
311 311 code = None
312 312
313 313 # Private attributes
314 314
315 315 # List with lines of input accumulated so far
316 316 _buffer = None
317 317 # Command compiler
318 318 _compile = None
319 319 # Boolean indicating whether the current block is complete
320 320 _is_complete = None
321 321 # Boolean indicating whether the current block has an unrecoverable syntax error
322 322 _is_invalid = False
323 323
324 324 def __init__(self):
325 325 """Create a new InputSplitter instance.
326 326 """
327 327 self._buffer = []
328 328 self._compile = codeop.CommandCompiler()
329 329 self.encoding = get_input_encoding()
330 330
331 331 def reset(self):
332 332 """Reset the input buffer and associated state."""
333 333 self._buffer[:] = []
334 334 self.source = ''
335 335 self.code = None
336 336 self._is_complete = False
337 337 self._is_invalid = False
338 338
339 339 def source_reset(self):
340 340 """Return the input source and perform a full reset.
341 341 """
342 342 out = self.source
343 343 self.reset()
344 344 return out
345 345
346 346 def check_complete(self, source):
347 347 """Return whether a block of code is ready to execute, or should be continued
348 348
349 349 This is a non-stateful API, and will reset the state of this InputSplitter.
350 350
351 351 Parameters
352 352 ----------
353 353 source : string
354 354 Python input code, which can be multiline.
355 355
356 356 Returns
357 357 -------
358 358 status : str
359 359 One of 'complete', 'incomplete', or 'invalid' if source is not a
360 360 prefix of valid code.
361 361 indent_spaces : int or None
362 362 The number of spaces by which to indent the next line of code. If
363 363 status is not 'incomplete', this is None.
364 364 """
365 365 self.reset()
366 366 try:
367 367 self.push(source)
368 368 except SyntaxError:
369 369 # Transformers in IPythonInputSplitter can raise SyntaxError,
370 370 # which push() will not catch.
371 371 return 'invalid', None
372 372 else:
373 373 if self._is_invalid:
374 374 return 'invalid', None
375 375 elif self.push_accepts_more():
376 376 return 'incomplete', self.get_indent_spaces()
377 377 else:
378 378 return 'complete', None
379 379 finally:
380 380 self.reset()
381 381
382 382 def push(self, lines):
383 383 """Push one or more lines of input.
384 384
385 385 This stores the given lines and returns a status code indicating
386 386 whether the code forms a complete Python block or not.
387 387
388 388 Any exceptions generated in compilation are swallowed, but if an
389 389 exception was produced, the method returns True.
390 390
391 391 Parameters
392 392 ----------
393 393 lines : string
394 394 One or more lines of Python input.
395 395
396 396 Returns
397 397 -------
398 398 is_complete : boolean
399 399 True if the current input source (the result of the current input
400 400 plus prior inputs) forms a complete Python execution block. Note that
401 401 this value is also stored as a private attribute (``_is_complete``), so it
402 402 can be queried at any time.
403 403 """
404 404 self._store(lines)
405 405 source = self.source
406 406
407 407 # Before calling _compile(), reset the code object to None so that if an
408 408 # exception is raised in compilation, we don't mislead by having
409 409 # inconsistent code/source attributes.
410 410 self.code, self._is_complete = None, None
411 411 self._is_invalid = False
412 412
413 413 # Honor termination lines properly
414 414 if source.endswith('\\\n'):
415 415 return False
416 416
417 417 try:
418 418 with warnings.catch_warnings():
419 419 warnings.simplefilter('error', SyntaxWarning)
420 420 self.code = self._compile(source, symbol="exec")
421 421 # Invalid syntax can produce any of a number of different errors from
422 422 # inside the compiler, so we have to catch them all. Syntax errors
423 423 # immediately produce a 'ready' block, so the invalid Python can be
424 424 # sent to the kernel for evaluation with possible ipython
425 425 # special-syntax conversion.
426 426 except (SyntaxError, OverflowError, ValueError, TypeError,
427 427 MemoryError, SyntaxWarning):
428 428 self._is_complete = True
429 429 self._is_invalid = True
430 430 else:
431 431 # Compilation didn't produce any exceptions (though it may not have
432 432 # given a complete code object)
433 433 self._is_complete = self.code is not None
434 434
435 435 return self._is_complete
436 436
437 437 def push_accepts_more(self):
438 438 """Return whether a block of interactive input can accept more input.
439 439
440 440 This method is meant to be used by line-oriented frontends, who need to
441 441 guess whether a block is complete or not based solely on prior and
442 442 current input lines. The InputSplitter considers it has a complete
443 443 interactive block and will not accept more input when either:
444 444
445 445 * A SyntaxError is raised
446 446
447 447 * The code is complete and consists of a single line or a single
448 448 non-compound statement
449 449
450 450 * The code is complete and has a blank line at the end
451 451
452 452 If the current input produces a syntax error, this method immediately
453 453 returns False but does *not* raise the syntax error exception, as
454 454 typically clients will want to send invalid syntax to an execution
455 455 backend which might convert the invalid syntax into valid Python via
456 456 one of the dynamic IPython mechanisms.
457 457 """
458 458
459 459 # With incomplete input, unconditionally accept more
460 460 # A syntax error also sets _is_complete to True - see push()
461 461 if not self._is_complete:
462 462 #print("Not complete") # debug
463 463 return True
464 464
465 465 # The user can make any (complete) input execute by leaving a blank line
466 466 last_line = self.source.splitlines()[-1]
467 467 if (not last_line) or last_line.isspace():
468 468 #print("Blank line") # debug
469 469 return False
470 470
471 471 # If there's just a single line or AST node, and we're flush left, as is
472 472 # the case after a simple statement such as 'a=1', we want to execute it
473 473 # straight away.
474 474 if self.get_indent_spaces() == 0:
475 475 if len(self.source.splitlines()) <= 1:
476 476 return False
477 477
478 478 try:
479 479 code_ast = ast.parse(u''.join(self._buffer))
480 480 except Exception:
481 481 #print("Can't parse AST") # debug
482 482 return False
483 483 else:
484 484 if len(code_ast.body) == 1 and \
485 485 not hasattr(code_ast.body[0], 'body'):
486 486 #print("Simple statement") # debug
487 487 return False
488 488
489 489 # General fallback - accept more code
490 490 return True
491 491
492 492 def get_indent_spaces(self):
493 493 sourcefor, n = self._indent_spaces_cache
494 494 if sourcefor == self.source:
495 495 return n
496 496
497 497 # self.source always has a trailing newline
498 498 n = find_next_indent(self.source[:-1])
499 499 self._indent_spaces_cache = (self.source, n)
500 500 return n
501 501
502 502 # Backwards compatibility. I think all code that used .indent_spaces was
503 503 # inside IPython, but we can leave this here until IPython 7 in case any
504 504 # other modules are using it. -TK, November 2017
505 505 indent_spaces = property(get_indent_spaces)
506 506
507 507 def _store(self, lines, buffer=None, store='source'):
508 508 """Store one or more lines of input.
509 509
510 510 If input lines are not newline-terminated, a newline is automatically
511 511 appended."""
512 512
513 513 if buffer is None:
514 514 buffer = self._buffer
515 515
516 516 if lines.endswith('\n'):
517 517 buffer.append(lines)
518 518 else:
519 519 buffer.append(lines+'\n')
520 520 setattr(self, store, self._set_source(buffer))
521 521
522 522 def _set_source(self, buffer):
523 523 return u''.join(buffer)
524 524
525 525
526 526 class IPythonInputSplitter(InputSplitter):
527 527 """An input splitter that recognizes all of IPython's special syntax."""
528 528
529 529 # String with raw, untransformed input.
530 530 source_raw = ''
531 531
532 532 # Flag to track when a transformer has stored input that it hasn't given
533 533 # back yet.
534 534 transformer_accumulating = False
535 535
536 536 # Flag to track when assemble_python_lines has stored input that it hasn't
537 537 # given back yet.
538 538 within_python_line = False
539 539
540 540 # Private attributes
541 541
542 542 # List with lines of raw input accumulated so far.
543 543 _buffer_raw = None
544 544
545 545 def __init__(self, line_input_checker=True, physical_line_transforms=None,
546 546 logical_line_transforms=None, python_line_transforms=None):
547 547 super(IPythonInputSplitter, self).__init__()
548 548 self._buffer_raw = []
549 549 self._validate = True
550 550
551 551 if physical_line_transforms is not None:
552 552 self.physical_line_transforms = physical_line_transforms
553 553 else:
554 554 self.physical_line_transforms = [
555 555 leading_indent(),
556 556 classic_prompt(),
557 557 ipy_prompt(),
558 558 cellmagic(end_on_blank_line=line_input_checker),
559 559 ]
560 560
561 561 self.assemble_logical_lines = assemble_logical_lines()
562 562 if logical_line_transforms is not None:
563 563 self.logical_line_transforms = logical_line_transforms
564 564 else:
565 565 self.logical_line_transforms = [
566 566 help_end(),
567 567 escaped_commands(),
568 568 assign_from_magic(),
569 569 assign_from_system(),
570 570 ]
571 571
572 572 self.assemble_python_lines = assemble_python_lines()
573 573 if python_line_transforms is not None:
574 574 self.python_line_transforms = python_line_transforms
575 575 else:
576 576 # We don't use any of these at present
577 577 self.python_line_transforms = []
578 578
579 579 @property
580 580 def transforms(self):
581 581 "Quick access to all transformers."
582 582 return self.physical_line_transforms + \
583 583 [self.assemble_logical_lines] + self.logical_line_transforms + \
584 584 [self.assemble_python_lines] + self.python_line_transforms
585 585
586 586 @property
587 587 def transforms_in_use(self):
588 588 """Transformers, excluding logical line transformers if we're in a
589 589 Python line."""
590 590 t = self.physical_line_transforms[:]
591 591 if not self.within_python_line:
592 592 t += [self.assemble_logical_lines] + self.logical_line_transforms
593 593 return t + [self.assemble_python_lines] + self.python_line_transforms
594 594
595 595 def reset(self):
596 596 """Reset the input buffer and associated state."""
597 597 super(IPythonInputSplitter, self).reset()
598 598 self._buffer_raw[:] = []
599 599 self.source_raw = ''
600 600 self.transformer_accumulating = False
601 601 self.within_python_line = False
602 602
603 603 for t in self.transforms:
604 604 try:
605 605 t.reset()
606 606 except SyntaxError:
607 607 # Nothing that calls reset() expects to handle transformer
608 608 # errors
609 609 pass
610 610
611 611 def flush_transformers(self):
612 612 def _flush(transform, outs):
613 613 """yield transformed lines
614 614
615 615 always strings, never None
616 616
617 617 transform: the current transform
618 618 outs: an iterable of previously transformed inputs.
619 619 Each may be multiline, which will be passed
620 620 one line at a time to transform.
621 621 """
622 622 for out in outs:
623 623 for line in out.splitlines():
624 624 # push one line at a time
625 625 tmp = transform.push(line)
626 626 if tmp is not None:
627 627 yield tmp
628 628
629 629 # reset the transform
630 630 tmp = transform.reset()
631 631 if tmp is not None:
632 632 yield tmp
633 633
634 634 out = []
635 635 for t in self.transforms_in_use:
636 636 out = _flush(t, out)
637 637
638 638 out = list(out)
639 639 if out:
640 640 self._store('\n'.join(out))
641 641
642 642 def raw_reset(self):
643 643 """Return raw input only and perform a full reset.
644 644 """
645 645 out = self.source_raw
646 646 self.reset()
647 647 return out
648 648
649 649 def source_reset(self):
650 650 try:
651 651 self.flush_transformers()
652 652 return self.source
653 653 finally:
654 654 self.reset()
655 655
656 656 def push_accepts_more(self):
657 657 if self.transformer_accumulating:
658 658 return True
659 659 else:
660 660 return super(IPythonInputSplitter, self).push_accepts_more()
661 661
662 662 def transform_cell(self, cell):
663 663 """Process and translate a cell of input.
664 664 """
665 665 self.reset()
666 666 try:
667 667 self.push(cell)
668 668 self.flush_transformers()
669 669 return self.source
670 670 finally:
671 671 self.reset()
672 672
673 673 def push(self, lines):
674 674 """Push one or more lines of IPython input.
675 675
676 676 This stores the given lines and returns a status code indicating
677 677 whether the code forms a complete Python block or not, after processing
678 678 all input lines for special IPython syntax.
679 679
680 680 Any exceptions generated in compilation are swallowed, but if an
681 681 exception was produced, the method returns True.
682 682
683 683 Parameters
684 684 ----------
685 685 lines : string
686 686 One or more lines of Python input.
687 687
688 688 Returns
689 689 -------
690 690 is_complete : boolean
691 691 True if the current input source (the result of the current input
692 692 plus prior inputs) forms a complete Python execution block. Note that
693 693 this value is also stored as a private attribute (_is_complete), so it
694 694 can be queried at any time.
695 695 """
696 696
697 697 # We must ensure all input is pure unicode
698 698 lines = cast_unicode(lines, self.encoding)
699 699 # ''.splitlines() --> [], but we need to push the empty line to transformers
700 700 lines_list = lines.splitlines()
701 701 if not lines_list:
702 702 lines_list = ['']
703 703
704 704 # Store raw source before applying any transformations to it. Note
705 705 # that this must be done *after* the reset() call that would otherwise
706 706 # flush the buffer.
707 707 self._store(lines, self._buffer_raw, 'source_raw')
708 708
709 709 transformed_lines_list = []
710 710 for line in lines_list:
711 711 transformed = self._transform_line(line)
712 712 if transformed is not None:
713 713 transformed_lines_list.append(transformed)
714 714
715 715 if transformed_lines_list:
716 716 transformed_lines = '\n'.join(transformed_lines_list)
717 717 return super(IPythonInputSplitter, self).push(transformed_lines)
718 718 else:
719 719 # Got nothing back from transformers - they must be waiting for
720 720 # more input.
721 721 return False
722 722
723 723 def _transform_line(self, line):
724 724 """Push a line of input code through the various transformers.
725 725
726 726 Returns any output from the transformers, or None if a transformer
727 727 is accumulating lines.
728 728
729 729 Sets self.transformer_accumulating as a side effect.
730 730 """
731 731 def _accumulating(dbg):
732 732 #print(dbg)
733 733 self.transformer_accumulating = True
734 734 return None
735 735
736 736 for transformer in self.physical_line_transforms:
737 737 line = transformer.push(line)
738 738 if line is None:
739 739 return _accumulating(transformer)
740 740
741 741 if not self.within_python_line:
742 742 line = self.assemble_logical_lines.push(line)
743 743 if line is None:
744 744 return _accumulating('acc logical line')
745 745
746 746 for transformer in self.logical_line_transforms:
747 747 line = transformer.push(line)
748 748 if line is None:
749 749 return _accumulating(transformer)
750 750
751 751 line = self.assemble_python_lines.push(line)
752 752 if line is None:
753 753 self.within_python_line = True
754 754 return _accumulating('acc python line')
755 755 else:
756 756 self.within_python_line = False
757 757
758 758 for transformer in self.python_line_transforms:
759 759 line = transformer.push(line)
760 760 if line is None:
761 761 return _accumulating(transformer)
762 762
763 763 #print("transformers clear") #debug
764 764 self.transformer_accumulating = False
765 765 return line
766 766
@@ -1,534 +1,534 b''
1 1 """Input transformer classes to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5 """
6 6 import abc
7 7 import functools
8 8 import re
9 9 from io import StringIO
10 10
11 11 from IPython.core.splitinput import LineInfo
12 12 from IPython.utils import tokenize2
13 13 from IPython.utils.tokenize2 import generate_tokens, untokenize, TokenError
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Globals
17 17 #-----------------------------------------------------------------------------
18 18
19 19 # The escape sequences that define the syntax transformations IPython will
20 20 # apply to user input. These can NOT be just changed here: many regular
21 21 # expressions and other parts of the code may use their hardcoded values, and
22 22 # for all intents and purposes they constitute the 'IPython syntax', so they
23 23 # should be considered fixed.
24 24
25 25 ESC_SHELL = '!' # Send line to underlying system shell
26 26 ESC_SH_CAP = '!!' # Send line to system shell and capture output
27 27 ESC_HELP = '?' # Find information about object
28 28 ESC_HELP2 = '??' # Find extra-detailed information about object
29 29 ESC_MAGIC = '%' # Call magic function
30 30 ESC_MAGIC2 = '%%' # Call cell-magic function
31 31 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
32 32 ESC_QUOTE2 = ';' # Quote all args as a single string, call
33 33 ESC_PAREN = '/' # Call first argument with rest of line as arguments
34 34
35 35 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
36 36 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
37 37 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
38 38
39 39
40 40 class InputTransformer(metaclass=abc.ABCMeta):
41 41 """Abstract base class for line-based input transformers."""
42 42
43 43 @abc.abstractmethod
44 44 def push(self, line):
45 45 """Send a line of input to the transformer, returning the transformed
46 46 input or None if the transformer is waiting for more input.
47 47
48 48 Must be overridden by subclasses.
49 49
50 50 Implementations may raise ``SyntaxError`` if the input is invalid. No
51 51 other exceptions may be raised.
52 52 """
53 53 pass
54 54
55 55 @abc.abstractmethod
56 56 def reset(self):
57 57 """Return, transformed any lines that the transformer has accumulated,
58 58 and reset its internal state.
59 59
60 60 Must be overridden by subclasses.
61 61 """
62 62 pass
63 63
64 64 @classmethod
65 65 def wrap(cls, func):
66 66 """Can be used by subclasses as a decorator, to return a factory that
67 67 will allow instantiation with the decorated object.
68 68 """
69 69 @functools.wraps(func)
70 70 def transformer_factory(**kwargs):
71 71 return cls(func, **kwargs)
72 72
73 73 return transformer_factory
74 74
75 75 class StatelessInputTransformer(InputTransformer):
76 76 """Wrapper for a stateless input transformer implemented as a function."""
77 77 def __init__(self, func):
78 78 self.func = func
79 79
80 80 def __repr__(self):
81 81 return "StatelessInputTransformer(func={0!r})".format(self.func)
82 82
83 83 def push(self, line):
84 84 """Send a line of input to the transformer, returning the
85 85 transformed input."""
86 86 return self.func(line)
87 87
88 88 def reset(self):
89 89 """No-op - exists for compatibility."""
90 90 pass
91 91
92 92 class CoroutineInputTransformer(InputTransformer):
93 93 """Wrapper for an input transformer implemented as a coroutine."""
94 94 def __init__(self, coro, **kwargs):
95 95 # Prime it
96 96 self.coro = coro(**kwargs)
97 97 next(self.coro)
98 98
99 99 def __repr__(self):
100 100 return "CoroutineInputTransformer(coro={0!r})".format(self.coro)
101 101
102 102 def push(self, line):
103 103 """Send a line of input to the transformer, returning the
104 104 transformed input or None if the transformer is waiting for more
105 105 input.
106 106 """
107 107 return self.coro.send(line)
108 108
109 109 def reset(self):
110 110 """Return, transformed any lines that the transformer has
111 111 accumulated, and reset its internal state.
112 112 """
113 113 return self.coro.send(None)
114 114
115 115 class TokenInputTransformer(InputTransformer):
116 116 """Wrapper for a token-based input transformer.
117 117
118 118 func should accept a list of tokens (5-tuples, see tokenize docs), and
119 119 return an iterable which can be passed to tokenize.untokenize().
120 120 """
121 121 def __init__(self, func):
122 122 self.func = func
123 123 self.buf = []
124 124 self.reset_tokenizer()
125 125
126 126 def reset_tokenizer(self):
127 127 it = iter(self.buf)
128 128 self.tokenizer = generate_tokens(it.__next__)
129 129
130 130 def push(self, line):
131 131 self.buf.append(line + '\n')
132 132 if all(l.isspace() for l in self.buf):
133 133 return self.reset()
134 134
135 135 tokens = []
136 136 stop_at_NL = False
137 137 try:
138 138 for intok in self.tokenizer:
139 139 tokens.append(intok)
140 140 t = intok[0]
141 141 if t == tokenize2.NEWLINE or (stop_at_NL and t == tokenize2.NL):
142 142 # Stop before we try to pull a line we don't have yet
143 143 break
144 144 elif t == tokenize2.ERRORTOKEN:
145 145 stop_at_NL = True
146 146 except TokenError:
147 147 # Multi-line statement - stop and try again with the next line
148 148 self.reset_tokenizer()
149 149 return None
150 150
151 151 return self.output(tokens)
152 152
153 153 def output(self, tokens):
154 154 self.buf.clear()
155 155 self.reset_tokenizer()
156 156 return untokenize(self.func(tokens)).rstrip('\n')
157 157
158 158 def reset(self):
159 159 l = ''.join(self.buf)
160 160 self.buf.clear()
161 161 self.reset_tokenizer()
162 162 if l:
163 163 return l.rstrip('\n')
164 164
165 165 class assemble_python_lines(TokenInputTransformer):
166 166 def __init__(self):
167 167 super(assemble_python_lines, self).__init__(None)
168 168
169 169 def output(self, tokens):
170 170 return self.reset()
171 171
172 172 @CoroutineInputTransformer.wrap
173 173 def assemble_logical_lines():
174 """Join lines following explicit line continuations (\)"""
174 r"""Join lines following explicit line continuations (\)"""
175 175 line = ''
176 176 while True:
177 177 line = (yield line)
178 178 if not line or line.isspace():
179 179 continue
180 180
181 181 parts = []
182 182 while line is not None:
183 183 if line.endswith('\\') and (not has_comment(line)):
184 184 parts.append(line[:-1])
185 185 line = (yield None) # Get another line
186 186 else:
187 187 parts.append(line)
188 188 break
189 189
190 190 # Output
191 191 line = ''.join(parts)
192 192
193 193 # Utilities
194 194 def _make_help_call(target, esc, lspace, next_input=None):
195 195 """Prepares a pinfo(2)/psearch call from a target name and the escape
196 196 (i.e. ? or ??)"""
197 197 method = 'pinfo2' if esc == '??' \
198 198 else 'psearch' if '*' in target \
199 199 else 'pinfo'
200 200 arg = " ".join([method, target])
201 201 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
202 202 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
203 203 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
204 204 if next_input is None:
205 205 return '%sget_ipython().run_line_magic(%r, %r)' % (lspace, t_magic_name, t_magic_arg_s)
206 206 else:
207 207 return '%sget_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
208 208 (lspace, next_input, t_magic_name, t_magic_arg_s)
209 209
210 210 # These define the transformations for the different escape characters.
211 211 def _tr_system(line_info):
212 212 "Translate lines escaped with: !"
213 213 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
214 214 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
215 215
216 216 def _tr_system2(line_info):
217 217 "Translate lines escaped with: !!"
218 218 cmd = line_info.line.lstrip()[2:]
219 219 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
220 220
221 221 def _tr_help(line_info):
222 222 "Translate lines escaped with: ?/??"
223 223 # A naked help line should just fire the intro help screen
224 224 if not line_info.line[1:]:
225 225 return 'get_ipython().show_usage()'
226 226
227 227 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
228 228
229 229 def _tr_magic(line_info):
230 230 "Translate lines escaped with: %"
231 231 tpl = '%sget_ipython().run_line_magic(%r, %r)'
232 232 if line_info.line.startswith(ESC_MAGIC2):
233 233 return line_info.line
234 234 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
235 235 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
236 236 t_magic_name, _, t_magic_arg_s = cmd.partition(' ')
237 237 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
238 238 return tpl % (line_info.pre, t_magic_name, t_magic_arg_s)
239 239
240 240 def _tr_quote(line_info):
241 241 "Translate lines escaped with: ,"
242 242 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
243 243 '", "'.join(line_info.the_rest.split()) )
244 244
245 245 def _tr_quote2(line_info):
246 246 "Translate lines escaped with: ;"
247 247 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
248 248 line_info.the_rest)
249 249
250 250 def _tr_paren(line_info):
251 251 "Translate lines escaped with: /"
252 252 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
253 253 ", ".join(line_info.the_rest.split()))
254 254
255 255 tr = { ESC_SHELL : _tr_system,
256 256 ESC_SH_CAP : _tr_system2,
257 257 ESC_HELP : _tr_help,
258 258 ESC_HELP2 : _tr_help,
259 259 ESC_MAGIC : _tr_magic,
260 260 ESC_QUOTE : _tr_quote,
261 261 ESC_QUOTE2 : _tr_quote2,
262 262 ESC_PAREN : _tr_paren }
263 263
264 264 @StatelessInputTransformer.wrap
265 265 def escaped_commands(line):
266 266 """Transform escaped commands - %magic, !system, ?help + various autocalls.
267 267 """
268 268 if not line or line.isspace():
269 269 return line
270 270 lineinf = LineInfo(line)
271 271 if lineinf.esc not in tr:
272 272 return line
273 273
274 274 return tr[lineinf.esc](lineinf)
275 275
276 276 _initial_space_re = re.compile(r'\s*')
277 277
278 278 _help_end_re = re.compile(r"""(%{0,2}
279 279 [a-zA-Z_*][\w*]* # Variable name
280 280 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
281 281 )
282 282 (\?\??)$ # ? or ??
283 283 """,
284 284 re.VERBOSE)
285 285
286 286 # Extra pseudotokens for multiline strings and data structures
287 287 _MULTILINE_STRING = object()
288 288 _MULTILINE_STRUCTURE = object()
289 289
290 290 def _line_tokens(line):
291 291 """Helper for has_comment and ends_in_comment_or_string."""
292 292 readline = StringIO(line).readline
293 293 toktypes = set()
294 294 try:
295 295 for t in generate_tokens(readline):
296 296 toktypes.add(t[0])
297 297 except TokenError as e:
298 298 # There are only two cases where a TokenError is raised.
299 299 if 'multi-line string' in e.args[0]:
300 300 toktypes.add(_MULTILINE_STRING)
301 301 else:
302 302 toktypes.add(_MULTILINE_STRUCTURE)
303 303 return toktypes
304 304
305 305 def has_comment(src):
306 306 """Indicate whether an input line has (i.e. ends in, or is) a comment.
307 307
308 308 This uses tokenize, so it can distinguish comments from # inside strings.
309 309
310 310 Parameters
311 311 ----------
312 312 src : string
313 313 A single line input string.
314 314
315 315 Returns
316 316 -------
317 317 comment : bool
318 318 True if source has a comment.
319 319 """
320 320 return (tokenize2.COMMENT in _line_tokens(src))
321 321
322 322 def ends_in_comment_or_string(src):
323 323 """Indicates whether or not an input line ends in a comment or within
324 324 a multiline string.
325 325
326 326 Parameters
327 327 ----------
328 328 src : string
329 329 A single line input string.
330 330
331 331 Returns
332 332 -------
333 333 comment : bool
334 334 True if source ends in a comment or multiline string.
335 335 """
336 336 toktypes = _line_tokens(src)
337 337 return (tokenize2.COMMENT in toktypes) or (_MULTILINE_STRING in toktypes)
338 338
339 339
340 340 @StatelessInputTransformer.wrap
341 341 def help_end(line):
342 342 """Translate lines with ?/?? at the end"""
343 343 m = _help_end_re.search(line)
344 344 if m is None or ends_in_comment_or_string(line):
345 345 return line
346 346 target = m.group(1)
347 347 esc = m.group(3)
348 348 lspace = _initial_space_re.match(line).group(0)
349 349
350 350 # If we're mid-command, put it back on the next prompt for the user.
351 351 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
352 352
353 353 return _make_help_call(target, esc, lspace, next_input)
354 354
355 355
356 356 @CoroutineInputTransformer.wrap
357 357 def cellmagic(end_on_blank_line=False):
358 358 """Captures & transforms cell magics.
359 359
360 360 After a cell magic is started, this stores up any lines it gets until it is
361 361 reset (sent None).
362 362 """
363 363 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
364 cellmagic_help_re = re.compile('%%\w+\?')
364 cellmagic_help_re = re.compile(r'%%\w+\?')
365 365 line = ''
366 366 while True:
367 367 line = (yield line)
368 368 # consume leading empty lines
369 369 while not line:
370 370 line = (yield line)
371 371
372 372 if not line.startswith(ESC_MAGIC2):
373 373 # This isn't a cell magic, idle waiting for reset then start over
374 374 while line is not None:
375 375 line = (yield line)
376 376 continue
377 377
378 378 if cellmagic_help_re.match(line):
379 379 # This case will be handled by help_end
380 380 continue
381 381
382 382 first = line
383 383 body = []
384 384 line = (yield None)
385 385 while (line is not None) and \
386 386 ((line.strip() != '') or not end_on_blank_line):
387 387 body.append(line)
388 388 line = (yield None)
389 389
390 390 # Output
391 391 magic_name, _, first = first.partition(' ')
392 392 magic_name = magic_name.lstrip(ESC_MAGIC2)
393 393 line = tpl % (magic_name, first, u'\n'.join(body))
394 394
395 395
396 396 def _strip_prompts(prompt_re, initial_re=None, turnoff_re=None):
397 397 """Remove matching input prompts from a block of input.
398 398
399 399 Parameters
400 400 ----------
401 401 prompt_re : regular expression
402 402 A regular expression matching any input prompt (including continuation)
403 403 initial_re : regular expression, optional
404 404 A regular expression matching only the initial prompt, but not continuation.
405 405 If no initial expression is given, prompt_re will be used everywhere.
406 406 Used mainly for plain Python prompts, where the continuation prompt
407 407 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
408 408
409 409 If initial_re and prompt_re differ,
410 410 only initial_re will be tested against the first line.
411 411 If any prompt is found on the first two lines,
412 412 prompts will be stripped from the rest of the block.
413 413 """
414 414 if initial_re is None:
415 415 initial_re = prompt_re
416 416 line = ''
417 417 while True:
418 418 line = (yield line)
419 419
420 420 # First line of cell
421 421 if line is None:
422 422 continue
423 423 out, n1 = initial_re.subn('', line, count=1)
424 424 if turnoff_re and not n1:
425 425 if turnoff_re.match(line):
426 426 # We're in e.g. a cell magic; disable this transformer for
427 427 # the rest of the cell.
428 428 while line is not None:
429 429 line = (yield line)
430 430 continue
431 431
432 432 line = (yield out)
433 433
434 434 if line is None:
435 435 continue
436 436 # check for any prompt on the second line of the cell,
437 437 # because people often copy from just after the first prompt,
438 438 # so we might not see it in the first line.
439 439 out, n2 = prompt_re.subn('', line, count=1)
440 440 line = (yield out)
441 441
442 442 if n1 or n2:
443 443 # Found a prompt in the first two lines - check for it in
444 444 # the rest of the cell as well.
445 445 while line is not None:
446 446 line = (yield prompt_re.sub('', line, count=1))
447 447
448 448 else:
449 449 # Prompts not in input - wait for reset
450 450 while line is not None:
451 451 line = (yield line)
452 452
453 453 @CoroutineInputTransformer.wrap
454 454 def classic_prompt():
455 455 """Strip the >>>/... prompts of the Python interactive shell."""
456 456 # FIXME: non-capturing version (?:...) usable?
457 457 prompt_re = re.compile(r'^(>>>|\.\.\.)( |$)')
458 458 initial_re = re.compile(r'^>>>( |$)')
459 459 # Any %magic/!system is IPython syntax, so we needn't look for >>> prompts
460 460 turnoff_re = re.compile(r'^[%!]')
461 461 return _strip_prompts(prompt_re, initial_re, turnoff_re)
462 462
463 463 @CoroutineInputTransformer.wrap
464 464 def ipy_prompt():
465 465 """Strip IPython's In [1]:/...: prompts."""
466 466 # FIXME: non-capturing version (?:...) usable?
467 467 prompt_re = re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')
468 468 # Disable prompt stripping inside cell magics
469 469 turnoff_re = re.compile(r'^%%')
470 470 return _strip_prompts(prompt_re, turnoff_re=turnoff_re)
471 471
472 472
473 473 @CoroutineInputTransformer.wrap
474 474 def leading_indent():
475 475 """Remove leading indentation.
476 476
477 477 If the first line starts with a spaces or tabs, the same whitespace will be
478 478 removed from each following line until it is reset.
479 479 """
480 480 space_re = re.compile(r'^[ \t]+')
481 481 line = ''
482 482 while True:
483 483 line = (yield line)
484 484
485 485 if line is None:
486 486 continue
487 487
488 488 m = space_re.match(line)
489 489 if m:
490 490 space = m.group(0)
491 491 while line is not None:
492 492 if line.startswith(space):
493 493 line = line[len(space):]
494 494 line = (yield line)
495 495 else:
496 496 # No leading spaces - wait for reset
497 497 while line is not None:
498 498 line = (yield line)
499 499
500 500
501 501 _assign_pat = \
502 502 r'''(?P<lhs>(\s*)
503 503 ([\w\.]+) # Initial identifier
504 504 (\s*,\s*
505 505 \*?[\w\.]+)* # Further identifiers for unpacking
506 506 \s*?,? # Trailing comma
507 507 )
508 508 \s*=\s*
509 509 '''
510 510
511 511 assign_system_re = re.compile(r'{}!\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
512 512 assign_system_template = '%s = get_ipython().getoutput(%r)'
513 513 @StatelessInputTransformer.wrap
514 514 def assign_from_system(line):
515 515 """Transform assignment from system commands (e.g. files = !ls)"""
516 516 m = assign_system_re.match(line)
517 517 if m is None:
518 518 return line
519 519
520 520 return assign_system_template % m.group('lhs', 'cmd')
521 521
522 522 assign_magic_re = re.compile(r'{}%\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
523 523 assign_magic_template = '%s = get_ipython().run_line_magic(%r, %r)'
524 524 @StatelessInputTransformer.wrap
525 525 def assign_from_magic(line):
526 526 """Transform assignment from magic commands (e.g. a = %who_ls)"""
527 527 m = assign_magic_re.match(line)
528 528 if m is None:
529 529 return line
530 530 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
531 531 m_lhs, m_cmd = m.group('lhs', 'cmd')
532 532 t_magic_name, _, t_magic_arg_s = m_cmd.partition(' ')
533 533 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
534 534 return assign_magic_template % (m_lhs, t_magic_name, t_magic_arg_s)
@@ -1,158 +1,158 b''
1 1 """Implementation of configuration-related magic functions.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (c) 2012 The IPython Development Team.
5 5 #
6 6 # Distributed under the terms of the Modified BSD License.
7 7 #
8 8 # The full license is in the file COPYING.txt, distributed with this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15 # Stdlib
16 16 import re
17 17
18 18 # Our own packages
19 19 from IPython.core.error import UsageError
20 20 from IPython.core.magic import Magics, magics_class, line_magic
21 21 from logging import error
22 22
23 23 #-----------------------------------------------------------------------------
24 24 # Magic implementation classes
25 25 #-----------------------------------------------------------------------------
26 26
27 reg = re.compile('^\w+\.\w+$')
27 reg = re.compile(r'^\w+\.\w+$')
28 28 @magics_class
29 29 class ConfigMagics(Magics):
30 30
31 31 def __init__(self, shell):
32 32 super(ConfigMagics, self).__init__(shell)
33 33 self.configurables = []
34 34
35 35 @line_magic
36 36 def config(self, s):
37 37 """configure IPython
38 38
39 39 %config Class[.trait=value]
40 40
41 41 This magic exposes most of the IPython config system. Any
42 42 Configurable class should be able to be configured with the simple
43 43 line::
44 44
45 45 %config Class.trait=value
46 46
47 47 Where `value` will be resolved in the user's namespace, if it is an
48 48 expression or variable name.
49 49
50 50 Examples
51 51 --------
52 52
53 53 To see what classes are available for config, pass no arguments::
54 54
55 55 In [1]: %config
56 56 Available objects for config:
57 57 TerminalInteractiveShell
58 58 HistoryManager
59 59 PrefilterManager
60 60 AliasManager
61 61 IPCompleter
62 62 DisplayFormatter
63 63
64 64 To view what is configurable on a given class, just pass the class
65 65 name::
66 66
67 67 In [2]: %config IPCompleter
68 68 IPCompleter options
69 69 -----------------
70 70 IPCompleter.omit__names=<Enum>
71 71 Current: 2
72 72 Choices: (0, 1, 2)
73 73 Instruct the completer to omit private method names
74 74 Specifically, when completing on ``object.<tab>``.
75 75 When 2 [default]: all names that start with '_' will be excluded.
76 76 When 1: all 'magic' names (``__foo__``) will be excluded.
77 77 When 0: nothing will be excluded.
78 78 IPCompleter.merge_completions=<CBool>
79 79 Current: True
80 80 Whether to merge completion results into a single list
81 81 If False, only the completion results from the first non-empty
82 82 completer will be returned.
83 83 IPCompleter.limit_to__all__=<CBool>
84 84 Current: False
85 85 Instruct the completer to use __all__ for the completion
86 86 Specifically, when completing on ``object.<tab>``.
87 87 When True: only those names in obj.__all__ will be included.
88 88 When False [default]: the __all__ attribute is ignored
89 89 IPCompleter.greedy=<CBool>
90 90 Current: False
91 91 Activate greedy completion
92 92 This will enable completion on elements of lists, results of
93 93 function calls, etc., but can be unsafe because the code is
94 94 actually evaluated on TAB.
95 95
96 96 but the real use is in setting values::
97 97
98 98 In [3]: %config IPCompleter.greedy = True
99 99
100 100 and these values are read from the user_ns if they are variables::
101 101
102 102 In [4]: feeling_greedy=False
103 103
104 104 In [5]: %config IPCompleter.greedy = feeling_greedy
105 105
106 106 """
107 107 from traitlets.config.loader import Config
108 108 # some IPython objects are Configurable, but do not yet have
109 109 # any configurable traits. Exclude them from the effects of
110 110 # this magic, as their presence is just noise:
111 111 configurables = sorted(set([ c for c in self.shell.configurables
112 112 if c.__class__.class_traits(config=True)
113 113 ]), key=lambda x: x.__class__.__name__)
114 114 classnames = [ c.__class__.__name__ for c in configurables ]
115 115
116 116 line = s.strip()
117 117 if not line:
118 118 # print available configurable names
119 119 print("Available objects for config:")
120 120 for name in classnames:
121 121 print(" ", name)
122 122 return
123 123 elif line in classnames:
124 124 # `%config TerminalInteractiveShell` will print trait info for
125 125 # TerminalInteractiveShell
126 126 c = configurables[classnames.index(line)]
127 127 cls = c.__class__
128 128 help = cls.class_get_help(c)
129 129 # strip leading '--' from cl-args:
130 130 help = re.sub(re.compile(r'^--', re.MULTILINE), '', help)
131 131 print(help)
132 132 return
133 133 elif reg.match(line):
134 134 cls, attr = line.split('.')
135 135 return getattr(configurables[classnames.index(cls)],attr)
136 136 elif '=' not in line:
137 137 msg = "Invalid config statement: %r, "\
138 138 "should be `Class.trait = value`."
139 139
140 140 ll = line.lower()
141 141 for classname in classnames:
142 142 if ll == classname.lower():
143 143 msg = msg + '\nDid you mean %s (note the case)?' % classname
144 144 break
145 145
146 146 raise UsageError( msg % line)
147 147
148 148 # otherwise, assume we are setting configurables.
149 149 # leave quotes on args when splitting, because we want
150 150 # unquoted args to eval in user_ns
151 151 cfg = Config()
152 152 exec("cfg."+line, locals(), self.shell.user_ns)
153 153
154 154 for configurable in configurables:
155 155 try:
156 156 configurable.update_config(cfg)
157 157 except Exception as e:
158 158 error(e)
@@ -1,137 +1,137 b''
1 1 # encoding: utf-8
2 2 """
3 3 Simple utility for splitting user input. This is used by both inputsplitter and
4 4 prefilter.
5 5
6 6 Authors:
7 7
8 8 * Brian Granger
9 9 * Fernando Perez
10 10 """
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Copyright (C) 2008-2011 The IPython Development Team
14 14 #
15 15 # Distributed under the terms of the BSD License. The full license is in
16 16 # the file COPYING, distributed as part of this software.
17 17 #-----------------------------------------------------------------------------
18 18
19 19 #-----------------------------------------------------------------------------
20 20 # Imports
21 21 #-----------------------------------------------------------------------------
22 22
23 23 import re
24 24 import sys
25 25
26 26 from IPython.utils import py3compat
27 27 from IPython.utils.encoding import get_stream_enc
28 28
29 29 #-----------------------------------------------------------------------------
30 30 # Main function
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # RegExp for splitting line contents into pre-char//first word-method//rest.
34 34 # For clarity, each group in on one line.
35 35
36 36 # WARNING: update the regexp if the escapes in interactiveshell are changed, as
37 37 # they are hardwired in.
38 38
39 39 # Although it's not solely driven by the regex, note that:
40 40 # ,;/% only trigger if they are the first character on the line
41 41 # ! and !! trigger if they are first char(s) *or* follow an indent
42 42 # ? triggers as first or last char.
43 43
44 line_split = re.compile("""
44 line_split = re.compile(r"""
45 45 ^(\s*) # any leading space
46 46 ([,;/%]|!!?|\?\??)? # escape character or characters
47 47 \s*(%{0,2}[\w\.\*]*) # function/method, possibly with leading %
48 48 # to correctly treat things like '?%magic'
49 49 (.*?$|$) # rest of line
50 50 """, re.VERBOSE)
51 51
52 52
53 53 def split_user_input(line, pattern=None):
54 54 """Split user input into initial whitespace, escape character, function part
55 55 and the rest.
56 56 """
57 57 # We need to ensure that the rest of this routine deals only with unicode
58 58 encoding = get_stream_enc(sys.stdin, 'utf-8')
59 59 line = py3compat.cast_unicode(line, encoding)
60 60
61 61 if pattern is None:
62 62 pattern = line_split
63 63 match = pattern.match(line)
64 64 if not match:
65 65 # print "match failed for line '%s'" % line
66 66 try:
67 67 ifun, the_rest = line.split(None,1)
68 68 except ValueError:
69 69 # print "split failed for line '%s'" % line
70 70 ifun, the_rest = line, u''
71 pre = re.match('^(\s*)(.*)',line).groups()[0]
71 pre = re.match(r'^(\s*)(.*)',line).groups()[0]
72 72 esc = ""
73 73 else:
74 74 pre, esc, ifun, the_rest = match.groups()
75 75
76 76 #print 'line:<%s>' % line # dbg
77 77 #print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg
78 78 return pre, esc or '', ifun.strip(), the_rest.lstrip()
79 79
80 80
81 81 class LineInfo(object):
82 82 """A single line of input and associated info.
83 83
84 84 Includes the following as properties:
85 85
86 86 line
87 87 The original, raw line
88 88
89 89 continue_prompt
90 90 Is this line a continuation in a sequence of multiline input?
91 91
92 92 pre
93 93 Any leading whitespace.
94 94
95 95 esc
96 96 The escape character(s) in pre or the empty string if there isn't one.
97 97 Note that '!!' and '??' are possible values for esc. Otherwise it will
98 98 always be a single character.
99 99
100 100 ifun
101 101 The 'function part', which is basically the maximal initial sequence
102 102 of valid python identifiers and the '.' character. This is what is
103 103 checked for alias and magic transformations, used for auto-calling,
104 104 etc. In contrast to Python identifiers, it may start with "%" and contain
105 105 "*".
106 106
107 107 the_rest
108 108 Everything else on the line.
109 109 """
110 110 def __init__(self, line, continue_prompt=False):
111 111 self.line = line
112 112 self.continue_prompt = continue_prompt
113 113 self.pre, self.esc, self.ifun, self.the_rest = split_user_input(line)
114 114
115 115 self.pre_char = self.pre.strip()
116 116 if self.pre_char:
117 117 self.pre_whitespace = '' # No whitespace allowed before esc chars
118 118 else:
119 119 self.pre_whitespace = self.pre
120 120
121 121 def ofind(self, ip):
122 122 """Do a full, attribute-walking lookup of the ifun in the various
123 123 namespaces for the given IPython InteractiveShell instance.
124 124
125 125 Return a dict with keys: {found, obj, ospace, ismagic}
126 126
127 127 Note: can cause state changes because of calling getattr, but should
128 128 only be run if autocall is on and if the line hasn't matched any
129 129 other, less dangerous handlers.
130 130
131 131 Does cache the results of the call, so can be called multiple times
132 132 without worrying about *further* damaging state.
133 133 """
134 134 return ip._ofind(self.ifun)
135 135
136 136 def __str__(self):
137 137 return "LineInfo [%s|%s|%s|%s]" %(self.pre, self.esc, self.ifun, self.the_rest)
@@ -1,438 +1,438 b''
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for path handling.
4 4 """
5 5
6 6 # Copyright (c) IPython Development Team.
7 7 # Distributed under the terms of the Modified BSD License.
8 8
9 9 import os
10 10 import sys
11 11 import errno
12 12 import shutil
13 13 import random
14 14 import glob
15 15 from warnings import warn
16 16
17 17 from IPython.utils.process import system
18 18 from IPython.utils import py3compat
19 19 from IPython.utils.decorators import undoc
20 20
21 21 #-----------------------------------------------------------------------------
22 22 # Code
23 23 #-----------------------------------------------------------------------------
24 24
25 25 fs_encoding = sys.getfilesystemencoding()
26 26
27 27 def _writable_dir(path):
28 28 """Whether `path` is a directory, to which the user has write access."""
29 29 return os.path.isdir(path) and os.access(path, os.W_OK)
30 30
31 31 if sys.platform == 'win32':
32 32 def _get_long_path_name(path):
33 33 """Get a long path name (expand ~) on Windows using ctypes.
34 34
35 35 Examples
36 36 --------
37 37
38 38 >>> get_long_path_name('c:\\docume~1')
39 39 'c:\\\\Documents and Settings'
40 40
41 41 """
42 42 try:
43 43 import ctypes
44 44 except ImportError:
45 45 raise ImportError('you need to have ctypes installed for this to work')
46 46 _GetLongPathName = ctypes.windll.kernel32.GetLongPathNameW
47 47 _GetLongPathName.argtypes = [ctypes.c_wchar_p, ctypes.c_wchar_p,
48 48 ctypes.c_uint ]
49 49
50 50 buf = ctypes.create_unicode_buffer(260)
51 51 rv = _GetLongPathName(path, buf, 260)
52 52 if rv == 0 or rv > 260:
53 53 return path
54 54 else:
55 55 return buf.value
56 56 else:
57 57 def _get_long_path_name(path):
58 58 """Dummy no-op."""
59 59 return path
60 60
61 61
62 62
63 63 def get_long_path_name(path):
64 64 """Expand a path into its long form.
65 65
66 66 On Windows this expands any ~ in the paths. On other platforms, it is
67 67 a null operation.
68 68 """
69 69 return _get_long_path_name(path)
70 70
71 71
72 72 def unquote_filename(name, win32=(sys.platform=='win32')):
73 73 """ On Windows, remove leading and trailing quotes from filenames.
74 74
75 75 This function has been deprecated and should not be used any more:
76 76 unquoting is now taken care of by :func:`IPython.utils.process.arg_split`.
77 77 """
78 78 warn("'unquote_filename' is deprecated since IPython 5.0 and should not "
79 79 "be used anymore", DeprecationWarning, stacklevel=2)
80 80 if win32:
81 81 if name.startswith(("'", '"')) and name.endswith(("'", '"')):
82 82 name = name[1:-1]
83 83 return name
84 84
85 85
86 86 def compress_user(path):
87 87 """Reverse of :func:`os.path.expanduser`
88 88 """
89 89 home = os.path.expanduser('~')
90 90 if path.startswith(home):
91 91 path = "~" + path[len(home):]
92 92 return path
93 93
94 94 def get_py_filename(name, force_win32=None):
95 95 """Return a valid python filename in the current directory.
96 96
97 97 If the given name is not a file, it adds '.py' and searches again.
98 98 Raises IOError with an informative message if the file isn't found.
99 99 """
100 100
101 101 name = os.path.expanduser(name)
102 102 if force_win32 is not None:
103 103 warn("The 'force_win32' argument to 'get_py_filename' is deprecated "
104 104 "since IPython 5.0 and should not be used anymore",
105 105 DeprecationWarning, stacklevel=2)
106 106 if not os.path.isfile(name) and not name.endswith('.py'):
107 107 name += '.py'
108 108 if os.path.isfile(name):
109 109 return name
110 110 else:
111 111 raise IOError('File `%r` not found.' % name)
112 112
113 113
114 114 def filefind(filename, path_dirs=None):
115 115 """Find a file by looking through a sequence of paths.
116 116
117 117 This iterates through a sequence of paths looking for a file and returns
118 118 the full, absolute path of the first occurrence of the file. If no set of
119 119 path dirs is given, the filename is tested as is, after running through
120 120 :func:`expandvars` and :func:`expanduser`. Thus a simple call::
121 121
122 122 filefind('myfile.txt')
123 123
124 124 will find the file in the current working dir, but::
125 125
126 126 filefind('~/myfile.txt')
127 127
128 128 Will find the file in the users home directory. This function does not
129 129 automatically try any paths, such as the cwd or the user's home directory.
130 130
131 131 Parameters
132 132 ----------
133 133 filename : str
134 134 The filename to look for.
135 135 path_dirs : str, None or sequence of str
136 136 The sequence of paths to look for the file in. If None, the filename
137 137 need to be absolute or be in the cwd. If a string, the string is
138 138 put into a sequence and the searched. If a sequence, walk through
139 139 each element and join with ``filename``, calling :func:`expandvars`
140 140 and :func:`expanduser` before testing for existence.
141 141
142 142 Returns
143 143 -------
144 144 Raises :exc:`IOError` or returns absolute path to file.
145 145 """
146 146
147 147 # If paths are quoted, abspath gets confused, strip them...
148 148 filename = filename.strip('"').strip("'")
149 149 # If the input is an absolute path, just check it exists
150 150 if os.path.isabs(filename) and os.path.isfile(filename):
151 151 return filename
152 152
153 153 if path_dirs is None:
154 154 path_dirs = ("",)
155 155 elif isinstance(path_dirs, str):
156 156 path_dirs = (path_dirs,)
157 157
158 158 for path in path_dirs:
159 159 if path == '.': path = os.getcwd()
160 160 testname = expand_path(os.path.join(path, filename))
161 161 if os.path.isfile(testname):
162 162 return os.path.abspath(testname)
163 163
164 164 raise IOError("File %r does not exist in any of the search paths: %r" %
165 165 (filename, path_dirs) )
166 166
167 167
168 168 class HomeDirError(Exception):
169 169 pass
170 170
171 171
172 172 def get_home_dir(require_writable=False):
173 173 """Return the 'home' directory, as a unicode string.
174 174
175 175 Uses os.path.expanduser('~'), and checks for writability.
176 176
177 177 See stdlib docs for how this is determined.
178 178 $HOME is first priority on *ALL* platforms.
179 179
180 180 Parameters
181 181 ----------
182 182
183 183 require_writable : bool [default: False]
184 184 if True:
185 185 guarantees the return value is a writable directory, otherwise
186 186 raises HomeDirError
187 187 if False:
188 188 The path is resolved, but it is not guaranteed to exist or be writable.
189 189 """
190 190
191 191 homedir = os.path.expanduser('~')
192 192 # Next line will make things work even when /home/ is a symlink to
193 193 # /usr/home as it is on FreeBSD, for example
194 194 homedir = os.path.realpath(homedir)
195 195
196 196 if not _writable_dir(homedir) and os.name == 'nt':
197 197 # expanduser failed, use the registry to get the 'My Documents' folder.
198 198 try:
199 199 try:
200 200 import winreg as wreg # Py 3
201 201 except ImportError:
202 202 import _winreg as wreg # Py 2
203 203 key = wreg.OpenKey(
204 204 wreg.HKEY_CURRENT_USER,
205 "Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
205 r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
206 206 )
207 207 homedir = wreg.QueryValueEx(key,'Personal')[0]
208 208 key.Close()
209 209 except:
210 210 pass
211 211
212 212 if (not require_writable) or _writable_dir(homedir):
213 213 return py3compat.cast_unicode(homedir, fs_encoding)
214 214 else:
215 215 raise HomeDirError('%s is not a writable dir, '
216 216 'set $HOME environment variable to override' % homedir)
217 217
218 218 def get_xdg_dir():
219 219 """Return the XDG_CONFIG_HOME, if it is defined and exists, else None.
220 220
221 221 This is only for non-OS X posix (Linux,Unix,etc.) systems.
222 222 """
223 223
224 224 env = os.environ
225 225
226 226 if os.name == 'posix' and sys.platform != 'darwin':
227 227 # Linux, Unix, AIX, etc.
228 228 # use ~/.config if empty OR not set
229 229 xdg = env.get("XDG_CONFIG_HOME", None) or os.path.join(get_home_dir(), '.config')
230 230 if xdg and _writable_dir(xdg):
231 231 return py3compat.cast_unicode(xdg, fs_encoding)
232 232
233 233 return None
234 234
235 235
236 236 def get_xdg_cache_dir():
237 237 """Return the XDG_CACHE_HOME, if it is defined and exists, else None.
238 238
239 239 This is only for non-OS X posix (Linux,Unix,etc.) systems.
240 240 """
241 241
242 242 env = os.environ
243 243
244 244 if os.name == 'posix' and sys.platform != 'darwin':
245 245 # Linux, Unix, AIX, etc.
246 246 # use ~/.cache if empty OR not set
247 247 xdg = env.get("XDG_CACHE_HOME", None) or os.path.join(get_home_dir(), '.cache')
248 248 if xdg and _writable_dir(xdg):
249 249 return py3compat.cast_unicode(xdg, fs_encoding)
250 250
251 251 return None
252 252
253 253
254 254 @undoc
255 255 def get_ipython_dir():
256 256 warn("get_ipython_dir has moved to the IPython.paths module since IPython 4.0.", stacklevel=2)
257 257 from IPython.paths import get_ipython_dir
258 258 return get_ipython_dir()
259 259
260 260 @undoc
261 261 def get_ipython_cache_dir():
262 262 warn("get_ipython_cache_dir has moved to the IPython.paths module since IPython 4.0.", stacklevel=2)
263 263 from IPython.paths import get_ipython_cache_dir
264 264 return get_ipython_cache_dir()
265 265
266 266 @undoc
267 267 def get_ipython_package_dir():
268 268 warn("get_ipython_package_dir has moved to the IPython.paths module since IPython 4.0.", stacklevel=2)
269 269 from IPython.paths import get_ipython_package_dir
270 270 return get_ipython_package_dir()
271 271
272 272 @undoc
273 273 def get_ipython_module_path(module_str):
274 274 warn("get_ipython_module_path has moved to the IPython.paths module since IPython 4.0.", stacklevel=2)
275 275 from IPython.paths import get_ipython_module_path
276 276 return get_ipython_module_path(module_str)
277 277
278 278 @undoc
279 279 def locate_profile(profile='default'):
280 280 warn("locate_profile has moved to the IPython.paths module since IPython 4.0.", stacklevel=2)
281 281 from IPython.paths import locate_profile
282 282 return locate_profile(profile=profile)
283 283
284 284 def expand_path(s):
285 285 """Expand $VARS and ~names in a string, like a shell
286 286
287 287 :Examples:
288 288
289 289 In [2]: os.environ['FOO']='test'
290 290
291 291 In [3]: expand_path('variable FOO is $FOO')
292 292 Out[3]: 'variable FOO is test'
293 293 """
294 294 # This is a pretty subtle hack. When expand user is given a UNC path
295 295 # on Windows (\\server\share$\%username%), os.path.expandvars, removes
296 296 # the $ to get (\\server\share\%username%). I think it considered $
297 297 # alone an empty var. But, we need the $ to remains there (it indicates
298 298 # a hidden share).
299 299 if os.name=='nt':
300 300 s = s.replace('$\\', 'IPYTHON_TEMP')
301 301 s = os.path.expandvars(os.path.expanduser(s))
302 302 if os.name=='nt':
303 303 s = s.replace('IPYTHON_TEMP', '$\\')
304 304 return s
305 305
306 306
307 307 def unescape_glob(string):
308 308 """Unescape glob pattern in `string`."""
309 309 def unescape(s):
310 310 for pattern in '*[]!?':
311 311 s = s.replace(r'\{0}'.format(pattern), pattern)
312 312 return s
313 313 return '\\'.join(map(unescape, string.split('\\\\')))
314 314
315 315
316 316 def shellglob(args):
317 317 """
318 318 Do glob expansion for each element in `args` and return a flattened list.
319 319
320 320 Unmatched glob pattern will remain as-is in the returned list.
321 321
322 322 """
323 323 expanded = []
324 324 # Do not unescape backslash in Windows as it is interpreted as
325 325 # path separator:
326 326 unescape = unescape_glob if sys.platform != 'win32' else lambda x: x
327 327 for a in args:
328 328 expanded.extend(glob.glob(a) or [unescape(a)])
329 329 return expanded
330 330
331 331
332 332 def target_outdated(target,deps):
333 333 """Determine whether a target is out of date.
334 334
335 335 target_outdated(target,deps) -> 1/0
336 336
337 337 deps: list of filenames which MUST exist.
338 338 target: single filename which may or may not exist.
339 339
340 340 If target doesn't exist or is older than any file listed in deps, return
341 341 true, otherwise return false.
342 342 """
343 343 try:
344 344 target_time = os.path.getmtime(target)
345 345 except os.error:
346 346 return 1
347 347 for dep in deps:
348 348 dep_time = os.path.getmtime(dep)
349 349 if dep_time > target_time:
350 350 #print "For target",target,"Dep failed:",dep # dbg
351 351 #print "times (dep,tar):",dep_time,target_time # dbg
352 352 return 1
353 353 return 0
354 354
355 355
356 356 def target_update(target,deps,cmd):
357 357 """Update a target with a given command given a list of dependencies.
358 358
359 359 target_update(target,deps,cmd) -> runs cmd if target is outdated.
360 360
361 361 This is just a wrapper around target_outdated() which calls the given
362 362 command if target is outdated."""
363 363
364 364 if target_outdated(target,deps):
365 365 system(cmd)
366 366
367 367
368 368 ENOLINK = 1998
369 369
370 370 def link(src, dst):
371 371 """Hard links ``src`` to ``dst``, returning 0 or errno.
372 372
373 373 Note that the special errno ``ENOLINK`` will be returned if ``os.link`` isn't
374 374 supported by the operating system.
375 375 """
376 376
377 377 if not hasattr(os, "link"):
378 378 return ENOLINK
379 379 link_errno = 0
380 380 try:
381 381 os.link(src, dst)
382 382 except OSError as e:
383 383 link_errno = e.errno
384 384 return link_errno
385 385
386 386
387 387 def link_or_copy(src, dst):
388 388 """Attempts to hardlink ``src`` to ``dst``, copying if the link fails.
389 389
390 390 Attempts to maintain the semantics of ``shutil.copy``.
391 391
392 392 Because ``os.link`` does not overwrite files, a unique temporary file
393 393 will be used if the target already exists, then that file will be moved
394 394 into place.
395 395 """
396 396
397 397 if os.path.isdir(dst):
398 398 dst = os.path.join(dst, os.path.basename(src))
399 399
400 400 link_errno = link(src, dst)
401 401 if link_errno == errno.EEXIST:
402 402 if os.stat(src).st_ino == os.stat(dst).st_ino:
403 403 # dst is already a hard link to the correct file, so we don't need
404 404 # to do anything else. If we try to link and rename the file
405 405 # anyway, we get duplicate files - see http://bugs.python.org/issue21876
406 406 return
407 407
408 408 new_dst = dst + "-temp-%04X" %(random.randint(1, 16**4), )
409 409 try:
410 410 link_or_copy(src, new_dst)
411 411 except:
412 412 try:
413 413 os.remove(new_dst)
414 414 except OSError:
415 415 pass
416 416 raise
417 417 os.rename(new_dst, dst)
418 418 elif link_errno != 0:
419 419 # Either link isn't supported, or the filesystem doesn't support
420 420 # linking, or 'src' and 'dst' are on different filesystems.
421 421 shutil.copy(src, dst)
422 422
423 423 def ensure_dir_exists(path, mode=0o755):
424 424 """ensure that a directory exists
425 425
426 426 If it doesn't exist, try to create it and protect against a race condition
427 427 if another process is doing the same.
428 428
429 429 The default permissions are 755, which differ from os.makedirs default of 777.
430 430 """
431 431 if not os.path.exists(path):
432 432 try:
433 433 os.makedirs(path, mode=mode)
434 434 except OSError as e:
435 435 if e.errno != errno.EEXIST:
436 436 raise
437 437 elif not os.path.isdir(path):
438 438 raise IOError("%r exists but is not a directory" % path)
@@ -1,772 +1,772 b''
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for working with strings and text.
4 4
5 5 Inheritance diagram:
6 6
7 7 .. inheritance-diagram:: IPython.utils.text
8 8 :parts: 3
9 9 """
10 10
11 11 import os
12 12 import re
13 13 import sys
14 14 import textwrap
15 15 from string import Formatter
16 16 from pathlib import Path
17 17
18 18 from IPython.utils import py3compat
19 19
20 20 # datetime.strftime date format for ipython
21 21 if sys.platform == 'win32':
22 22 date_format = "%B %d, %Y"
23 23 else:
24 24 date_format = "%B %-d, %Y"
25 25
26 26 class LSString(str):
27 27 """String derivative with a special access attributes.
28 28
29 29 These are normal strings, but with the special attributes:
30 30
31 31 .l (or .list) : value as list (split on newlines).
32 32 .n (or .nlstr): original value (the string itself).
33 33 .s (or .spstr): value as whitespace-separated string.
34 34 .p (or .paths): list of path objects (requires path.py package)
35 35
36 36 Any values which require transformations are computed only once and
37 37 cached.
38 38
39 39 Such strings are very useful to efficiently interact with the shell, which
40 40 typically only understands whitespace-separated options for commands."""
41 41
42 42 def get_list(self):
43 43 try:
44 44 return self.__list
45 45 except AttributeError:
46 46 self.__list = self.split('\n')
47 47 return self.__list
48 48
49 49 l = list = property(get_list)
50 50
51 51 def get_spstr(self):
52 52 try:
53 53 return self.__spstr
54 54 except AttributeError:
55 55 self.__spstr = self.replace('\n',' ')
56 56 return self.__spstr
57 57
58 58 s = spstr = property(get_spstr)
59 59
60 60 def get_nlstr(self):
61 61 return self
62 62
63 63 n = nlstr = property(get_nlstr)
64 64
65 65 def get_paths(self):
66 66 try:
67 67 return self.__paths
68 68 except AttributeError:
69 69 self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)]
70 70 return self.__paths
71 71
72 72 p = paths = property(get_paths)
73 73
74 74 # FIXME: We need to reimplement type specific displayhook and then add this
75 75 # back as a custom printer. This should also be moved outside utils into the
76 76 # core.
77 77
78 78 # def print_lsstring(arg):
79 79 # """ Prettier (non-repr-like) and more informative printer for LSString """
80 80 # print "LSString (.p, .n, .l, .s available). Value:"
81 81 # print arg
82 82 #
83 83 #
84 84 # print_lsstring = result_display.when_type(LSString)(print_lsstring)
85 85
86 86
87 87 class SList(list):
88 88 """List derivative with a special access attributes.
89 89
90 90 These are normal lists, but with the special attributes:
91 91
92 92 * .l (or .list) : value as list (the list itself).
93 93 * .n (or .nlstr): value as a string, joined on newlines.
94 94 * .s (or .spstr): value as a string, joined on spaces.
95 95 * .p (or .paths): list of path objects (requires path.py package)
96 96
97 97 Any values which require transformations are computed only once and
98 98 cached."""
99 99
100 100 def get_list(self):
101 101 return self
102 102
103 103 l = list = property(get_list)
104 104
105 105 def get_spstr(self):
106 106 try:
107 107 return self.__spstr
108 108 except AttributeError:
109 109 self.__spstr = ' '.join(self)
110 110 return self.__spstr
111 111
112 112 s = spstr = property(get_spstr)
113 113
114 114 def get_nlstr(self):
115 115 try:
116 116 return self.__nlstr
117 117 except AttributeError:
118 118 self.__nlstr = '\n'.join(self)
119 119 return self.__nlstr
120 120
121 121 n = nlstr = property(get_nlstr)
122 122
123 123 def get_paths(self):
124 124 try:
125 125 return self.__paths
126 126 except AttributeError:
127 127 self.__paths = [Path(p) for p in self if os.path.exists(p)]
128 128 return self.__paths
129 129
130 130 p = paths = property(get_paths)
131 131
132 132 def grep(self, pattern, prune = False, field = None):
133 133 """ Return all strings matching 'pattern' (a regex or callable)
134 134
135 135 This is case-insensitive. If prune is true, return all items
136 136 NOT matching the pattern.
137 137
138 138 If field is specified, the match must occur in the specified
139 139 whitespace-separated field.
140 140
141 141 Examples::
142 142
143 143 a.grep( lambda x: x.startswith('C') )
144 144 a.grep('Cha.*log', prune=1)
145 145 a.grep('chm', field=-1)
146 146 """
147 147
148 148 def match_target(s):
149 149 if field is None:
150 150 return s
151 151 parts = s.split()
152 152 try:
153 153 tgt = parts[field]
154 154 return tgt
155 155 except IndexError:
156 156 return ""
157 157
158 158 if isinstance(pattern, str):
159 159 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
160 160 else:
161 161 pred = pattern
162 162 if not prune:
163 163 return SList([el for el in self if pred(match_target(el))])
164 164 else:
165 165 return SList([el for el in self if not pred(match_target(el))])
166 166
167 167 def fields(self, *fields):
168 168 """ Collect whitespace-separated fields from string list
169 169
170 170 Allows quick awk-like usage of string lists.
171 171
172 172 Example data (in var a, created by 'a = !ls -l')::
173 173
174 174 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
175 175 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
176 176
177 177 * ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']``
178 178 * ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']``
179 179 (note the joining by space).
180 180 * ``a.fields(-1)`` is ``['ChangeLog', 'IPython']``
181 181
182 182 IndexErrors are ignored.
183 183
184 184 Without args, fields() just split()'s the strings.
185 185 """
186 186 if len(fields) == 0:
187 187 return [el.split() for el in self]
188 188
189 189 res = SList()
190 190 for el in [f.split() for f in self]:
191 191 lineparts = []
192 192
193 193 for fd in fields:
194 194 try:
195 195 lineparts.append(el[fd])
196 196 except IndexError:
197 197 pass
198 198 if lineparts:
199 199 res.append(" ".join(lineparts))
200 200
201 201 return res
202 202
203 203 def sort(self,field= None, nums = False):
204 204 """ sort by specified fields (see fields())
205 205
206 206 Example::
207 207
208 208 a.sort(1, nums = True)
209 209
210 210 Sorts a by second field, in numerical order (so that 21 > 3)
211 211
212 212 """
213 213
214 214 #decorate, sort, undecorate
215 215 if field is not None:
216 216 dsu = [[SList([line]).fields(field), line] for line in self]
217 217 else:
218 218 dsu = [[line, line] for line in self]
219 219 if nums:
220 220 for i in range(len(dsu)):
221 221 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
222 222 try:
223 223 n = int(numstr)
224 224 except ValueError:
225 225 n = 0
226 226 dsu[i][0] = n
227 227
228 228
229 229 dsu.sort()
230 230 return SList([t[1] for t in dsu])
231 231
232 232
233 233 # FIXME: We need to reimplement type specific displayhook and then add this
234 234 # back as a custom printer. This should also be moved outside utils into the
235 235 # core.
236 236
237 237 # def print_slist(arg):
238 238 # """ Prettier (non-repr-like) and more informative printer for SList """
239 239 # print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
240 240 # if hasattr(arg, 'hideonce') and arg.hideonce:
241 241 # arg.hideonce = False
242 242 # return
243 243 #
244 244 # nlprint(arg) # This was a nested list printer, now removed.
245 245 #
246 246 # print_slist = result_display.when_type(SList)(print_slist)
247 247
248 248
249 249 def indent(instr,nspaces=4, ntabs=0, flatten=False):
250 250 """Indent a string a given number of spaces or tabstops.
251 251
252 252 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
253 253
254 254 Parameters
255 255 ----------
256 256
257 257 instr : basestring
258 258 The string to be indented.
259 259 nspaces : int (default: 4)
260 260 The number of spaces to be indented.
261 261 ntabs : int (default: 0)
262 262 The number of tabs to be indented.
263 263 flatten : bool (default: False)
264 264 Whether to scrub existing indentation. If True, all lines will be
265 265 aligned to the same indentation. If False, existing indentation will
266 266 be strictly increased.
267 267
268 268 Returns
269 269 -------
270 270
271 271 str|unicode : string indented by ntabs and nspaces.
272 272
273 273 """
274 274 if instr is None:
275 275 return
276 276 ind = '\t'*ntabs+' '*nspaces
277 277 if flatten:
278 278 pat = re.compile(r'^\s*', re.MULTILINE)
279 279 else:
280 280 pat = re.compile(r'^', re.MULTILINE)
281 281 outstr = re.sub(pat, ind, instr)
282 282 if outstr.endswith(os.linesep+ind):
283 283 return outstr[:-len(ind)]
284 284 else:
285 285 return outstr
286 286
287 287
288 288 def list_strings(arg):
289 289 """Always return a list of strings, given a string or list of strings
290 290 as input.
291 291
292 292 Examples
293 293 --------
294 294 ::
295 295
296 296 In [7]: list_strings('A single string')
297 297 Out[7]: ['A single string']
298 298
299 299 In [8]: list_strings(['A single string in a list'])
300 300 Out[8]: ['A single string in a list']
301 301
302 302 In [9]: list_strings(['A','list','of','strings'])
303 303 Out[9]: ['A', 'list', 'of', 'strings']
304 304 """
305 305
306 306 if isinstance(arg, str):
307 307 return [arg]
308 308 else:
309 309 return arg
310 310
311 311
312 312 def marquee(txt='',width=78,mark='*'):
313 313 """Return the input string centered in a 'marquee'.
314 314
315 315 Examples
316 316 --------
317 317 ::
318 318
319 319 In [16]: marquee('A test',40)
320 320 Out[16]: '**************** A test ****************'
321 321
322 322 In [17]: marquee('A test',40,'-')
323 323 Out[17]: '---------------- A test ----------------'
324 324
325 325 In [18]: marquee('A test',40,' ')
326 326 Out[18]: ' A test '
327 327
328 328 """
329 329 if not txt:
330 330 return (mark*width)[:width]
331 331 nmark = (width-len(txt)-2)//len(mark)//2
332 332 if nmark < 0: nmark =0
333 333 marks = mark*nmark
334 334 return '%s %s %s' % (marks,txt,marks)
335 335
336 336
337 337 ini_spaces_re = re.compile(r'^(\s+)')
338 338
339 339 def num_ini_spaces(strng):
340 340 """Return the number of initial spaces in a string"""
341 341
342 342 ini_spaces = ini_spaces_re.match(strng)
343 343 if ini_spaces:
344 344 return ini_spaces.end()
345 345 else:
346 346 return 0
347 347
348 348
349 349 def format_screen(strng):
350 350 """Format a string for screen printing.
351 351
352 352 This removes some latex-type format codes."""
353 353 # Paragraph continue
354 354 par_re = re.compile(r'\\$',re.MULTILINE)
355 355 strng = par_re.sub('',strng)
356 356 return strng
357 357
358 358
359 359 def dedent(text):
360 360 """Equivalent of textwrap.dedent that ignores unindented first line.
361 361
362 362 This means it will still dedent strings like:
363 363 '''foo
364 364 is a bar
365 365 '''
366 366
367 367 For use in wrap_paragraphs.
368 368 """
369 369
370 370 if text.startswith('\n'):
371 371 # text starts with blank line, don't ignore the first line
372 372 return textwrap.dedent(text)
373 373
374 374 # split first line
375 375 splits = text.split('\n',1)
376 376 if len(splits) == 1:
377 377 # only one line
378 378 return textwrap.dedent(text)
379 379
380 380 first, rest = splits
381 381 # dedent everything but the first line
382 382 rest = textwrap.dedent(rest)
383 383 return '\n'.join([first, rest])
384 384
385 385
386 386 def wrap_paragraphs(text, ncols=80):
387 387 """Wrap multiple paragraphs to fit a specified width.
388 388
389 389 This is equivalent to textwrap.wrap, but with support for multiple
390 390 paragraphs, as separated by empty lines.
391 391
392 392 Returns
393 393 -------
394 394
395 395 list of complete paragraphs, wrapped to fill `ncols` columns.
396 396 """
397 397 paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
398 398 text = dedent(text).strip()
399 399 paragraphs = paragraph_re.split(text)[::2] # every other entry is space
400 400 out_ps = []
401 401 indent_re = re.compile(r'\n\s+', re.MULTILINE)
402 402 for p in paragraphs:
403 403 # presume indentation that survives dedent is meaningful formatting,
404 404 # so don't fill unless text is flush.
405 405 if indent_re.search(p) is None:
406 406 # wrap paragraph
407 407 p = textwrap.fill(p, ncols)
408 408 out_ps.append(p)
409 409 return out_ps
410 410
411 411
412 412 def long_substr(data):
413 413 """Return the longest common substring in a list of strings.
414 414
415 415 Credit: http://stackoverflow.com/questions/2892931/longest-common-substring-from-more-than-two-strings-python
416 416 """
417 417 substr = ''
418 418 if len(data) > 1 and len(data[0]) > 0:
419 419 for i in range(len(data[0])):
420 420 for j in range(len(data[0])-i+1):
421 421 if j > len(substr) and all(data[0][i:i+j] in x for x in data):
422 422 substr = data[0][i:i+j]
423 423 elif len(data) == 1:
424 424 substr = data[0]
425 425 return substr
426 426
427 427
428 428 def strip_email_quotes(text):
429 429 """Strip leading email quotation characters ('>').
430 430
431 431 Removes any combination of leading '>' interspersed with whitespace that
432 432 appears *identically* in all lines of the input text.
433 433
434 434 Parameters
435 435 ----------
436 436 text : str
437 437
438 438 Examples
439 439 --------
440 440
441 441 Simple uses::
442 442
443 443 In [2]: strip_email_quotes('> > text')
444 444 Out[2]: 'text'
445 445
446 446 In [3]: strip_email_quotes('> > text\\n> > more')
447 447 Out[3]: 'text\\nmore'
448 448
449 449 Note how only the common prefix that appears in all lines is stripped::
450 450
451 451 In [4]: strip_email_quotes('> > text\\n> > more\\n> more...')
452 452 Out[4]: '> text\\n> more\\nmore...'
453 453
454 454 So if any line has no quote marks ('>') , then none are stripped from any
455 455 of them ::
456 456
457 457 In [5]: strip_email_quotes('> > text\\n> > more\\nlast different')
458 458 Out[5]: '> > text\\n> > more\\nlast different'
459 459 """
460 460 lines = text.splitlines()
461 461 matches = set()
462 462 for line in lines:
463 463 prefix = re.match(r'^(\s*>[ >]*)', line)
464 464 if prefix:
465 465 matches.add(prefix.group(1))
466 466 else:
467 467 break
468 468 else:
469 469 prefix = long_substr(list(matches))
470 470 if prefix:
471 471 strip = len(prefix)
472 472 text = '\n'.join([ ln[strip:] for ln in lines])
473 473 return text
474 474
475 475 def strip_ansi(source):
476 476 """
477 477 Remove ansi escape codes from text.
478 478
479 479 Parameters
480 480 ----------
481 481 source : str
482 482 Source to remove the ansi from
483 483 """
484 484 return re.sub(r'\033\[(\d|;)+?m', '', source)
485 485
486 486
487 487 class EvalFormatter(Formatter):
488 488 """A String Formatter that allows evaluation of simple expressions.
489 489
490 490 Note that this version interprets a : as specifying a format string (as per
491 491 standard string formatting), so if slicing is required, you must explicitly
492 492 create a slice.
493 493
494 494 This is to be used in templating cases, such as the parallel batch
495 495 script templates, where simple arithmetic on arguments is useful.
496 496
497 497 Examples
498 498 --------
499 499 ::
500 500
501 501 In [1]: f = EvalFormatter()
502 502 In [2]: f.format('{n//4}', n=8)
503 503 Out[2]: '2'
504 504
505 505 In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello")
506 506 Out[3]: 'll'
507 507 """
508 508 def get_field(self, name, args, kwargs):
509 509 v = eval(name, kwargs)
510 510 return v, name
511 511
512 512 #XXX: As of Python 3.4, the format string parsing no longer splits on a colon
513 513 # inside [], so EvalFormatter can handle slicing. Once we only support 3.4 and
514 514 # above, it should be possible to remove FullEvalFormatter.
515 515
516 516 class FullEvalFormatter(Formatter):
517 517 """A String Formatter that allows evaluation of simple expressions.
518 518
519 519 Any time a format key is not found in the kwargs,
520 520 it will be tried as an expression in the kwargs namespace.
521 521
522 522 Note that this version allows slicing using [1:2], so you cannot specify
523 523 a format string. Use :class:`EvalFormatter` to permit format strings.
524 524
525 525 Examples
526 526 --------
527 527 ::
528 528
529 529 In [1]: f = FullEvalFormatter()
530 530 In [2]: f.format('{n//4}', n=8)
531 531 Out[2]: '2'
532 532
533 533 In [3]: f.format('{list(range(5))[2:4]}')
534 534 Out[3]: '[2, 3]'
535 535
536 536 In [4]: f.format('{3*2}')
537 537 Out[4]: '6'
538 538 """
539 539 # copied from Formatter._vformat with minor changes to allow eval
540 540 # and replace the format_spec code with slicing
541 541 def vformat(self, format_string, args, kwargs):
542 542 result = []
543 543 for literal_text, field_name, format_spec, conversion in \
544 544 self.parse(format_string):
545 545
546 546 # output the literal text
547 547 if literal_text:
548 548 result.append(literal_text)
549 549
550 550 # if there's a field, output it
551 551 if field_name is not None:
552 552 # this is some markup, find the object and do
553 553 # the formatting
554 554
555 555 if format_spec:
556 556 # override format spec, to allow slicing:
557 557 field_name = ':'.join([field_name, format_spec])
558 558
559 559 # eval the contents of the field for the object
560 560 # to be formatted
561 561 obj = eval(field_name, kwargs)
562 562
563 563 # do any conversion on the resulting object
564 564 obj = self.convert_field(obj, conversion)
565 565
566 566 # format the object and append to the result
567 567 result.append(self.format_field(obj, ''))
568 568
569 569 return ''.join(py3compat.cast_unicode(s) for s in result)
570 570
571 571
572 572 class DollarFormatter(FullEvalFormatter):
573 573 """Formatter allowing Itpl style $foo replacement, for names and attribute
574 574 access only. Standard {foo} replacement also works, and allows full
575 575 evaluation of its arguments.
576 576
577 577 Examples
578 578 --------
579 579 ::
580 580
581 581 In [1]: f = DollarFormatter()
582 582 In [2]: f.format('{n//4}', n=8)
583 583 Out[2]: '2'
584 584
585 585 In [3]: f.format('23 * 76 is $result', result=23*76)
586 586 Out[3]: '23 * 76 is 1748'
587 587
588 588 In [4]: f.format('$a or {b}', a=1, b=2)
589 589 Out[4]: '1 or 2'
590 590 """
591 _dollar_pattern_ignore_single_quote = re.compile("(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
591 _dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)")
592 592 def parse(self, fmt_string):
593 593 for literal_txt, field_name, format_spec, conversion \
594 594 in Formatter.parse(self, fmt_string):
595 595
596 596 # Find $foo patterns in the literal text.
597 597 continue_from = 0
598 598 txt = ""
599 599 for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt):
600 600 new_txt, new_field = m.group(1,2)
601 601 # $$foo --> $foo
602 602 if new_field.startswith("$"):
603 603 txt += new_txt + new_field
604 604 else:
605 605 yield (txt + new_txt, new_field, "", None)
606 606 txt = ""
607 607 continue_from = m.end()
608 608
609 609 # Re-yield the {foo} style pattern
610 610 yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion)
611 611
612 612 #-----------------------------------------------------------------------------
613 613 # Utils to columnize a list of string
614 614 #-----------------------------------------------------------------------------
615 615
616 616 def _col_chunks(l, max_rows, row_first=False):
617 617 """Yield successive max_rows-sized column chunks from l."""
618 618 if row_first:
619 619 ncols = (len(l) // max_rows) + (len(l) % max_rows > 0)
620 620 for i in range(ncols):
621 621 yield [l[j] for j in range(i, len(l), ncols)]
622 622 else:
623 623 for i in range(0, len(l), max_rows):
624 624 yield l[i:(i + max_rows)]
625 625
626 626
627 627 def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80):
628 628 """Calculate optimal info to columnize a list of string"""
629 629 for max_rows in range(1, len(rlist) + 1):
630 630 col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first)))
631 631 sumlength = sum(col_widths)
632 632 ncols = len(col_widths)
633 633 if sumlength + separator_size * (ncols - 1) <= displaywidth:
634 634 break
635 635 return {'num_columns': ncols,
636 636 'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0,
637 637 'max_rows': max_rows,
638 638 'column_widths': col_widths
639 639 }
640 640
641 641
642 642 def _get_or_default(mylist, i, default=None):
643 643 """return list item number, or default if don't exist"""
644 644 if i >= len(mylist):
645 645 return default
646 646 else :
647 647 return mylist[i]
648 648
649 649
650 650 def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) :
651 651 """Returns a nested list, and info to columnize items
652 652
653 653 Parameters
654 654 ----------
655 655
656 656 items
657 657 list of strings to columize
658 658 row_first : (default False)
659 659 Whether to compute columns for a row-first matrix instead of
660 660 column-first (default).
661 661 empty : (default None)
662 662 default value to fill list if needed
663 663 separator_size : int (default=2)
664 664 How much characters will be used as a separation between each columns.
665 665 displaywidth : int (default=80)
666 666 The width of the area onto which the columns should enter
667 667
668 668 Returns
669 669 -------
670 670
671 671 strings_matrix
672 672
673 673 nested list of string, the outer most list contains as many list as
674 674 rows, the innermost lists have each as many element as columns. If the
675 675 total number of elements in `items` does not equal the product of
676 676 rows*columns, the last element of some lists are filled with `None`.
677 677
678 678 dict_info
679 679 some info to make columnize easier:
680 680
681 681 num_columns
682 682 number of columns
683 683 max_rows
684 684 maximum number of rows (final number may be less)
685 685 column_widths
686 686 list of with of each columns
687 687 optimal_separator_width
688 688 best separator width between columns
689 689
690 690 Examples
691 691 --------
692 692 ::
693 693
694 694 In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
695 695 In [2]: list, info = compute_item_matrix(l, displaywidth=12)
696 696 In [3]: list
697 697 Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]]
698 698 In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5}
699 699 In [5]: all((info[k] == ideal[k] for k in ideal.keys()))
700 700 Out[5]: True
701 701 """
702 702 info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs)
703 703 nrow, ncol = info['max_rows'], info['num_columns']
704 704 if row_first:
705 705 return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info)
706 706 else:
707 707 return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info)
708 708
709 709
710 710 def columnize(items, row_first=False, separator=' ', displaywidth=80, spread=False):
711 711 """ Transform a list of strings into a single string with columns.
712 712
713 713 Parameters
714 714 ----------
715 715 items : sequence of strings
716 716 The strings to process.
717 717
718 718 row_first : (default False)
719 719 Whether to compute columns for a row-first matrix instead of
720 720 column-first (default).
721 721
722 722 separator : str, optional [default is two spaces]
723 723 The string that separates columns.
724 724
725 725 displaywidth : int, optional [default is 80]
726 726 Width of the display in number of characters.
727 727
728 728 Returns
729 729 -------
730 730 The formatted string.
731 731 """
732 732 if not items:
733 733 return '\n'
734 734 matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth)
735 735 if spread:
736 736 separator = separator.ljust(int(info['optimal_separator_width']))
737 737 fmatrix = [filter(None, x) for x in matrix]
738 738 sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])])
739 739 return '\n'.join(map(sjoin, fmatrix))+'\n'
740 740
741 741
742 742 def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""):
743 743 """
744 744 Return a string with a natural enumeration of items
745 745
746 746 >>> get_text_list(['a', 'b', 'c', 'd'])
747 747 'a, b, c and d'
748 748 >>> get_text_list(['a', 'b', 'c'], ' or ')
749 749 'a, b or c'
750 750 >>> get_text_list(['a', 'b', 'c'], ', ')
751 751 'a, b, c'
752 752 >>> get_text_list(['a', 'b'], ' or ')
753 753 'a or b'
754 754 >>> get_text_list(['a'])
755 755 'a'
756 756 >>> get_text_list([])
757 757 ''
758 758 >>> get_text_list(['a', 'b'], wrap_item_with="`")
759 759 '`a` and `b`'
760 760 >>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ")
761 761 'a + b + c = d'
762 762 """
763 763 if len(list_) == 0:
764 764 return ''
765 765 if wrap_item_with:
766 766 list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for
767 767 item in list_]
768 768 if len(list_) == 1:
769 769 return list_[0]
770 770 return '%s%s%s' % (
771 771 sep.join(i for i in list_[:-1]),
772 772 last_sep, list_[-1])
@@ -1,590 +1,590 b''
1 1 """Patched version of standard library tokenize, to deal with various bugs.
2 2
3 3 Based on Python 3.2 code.
4 4
5 5 Patches:
6 6
7 7 - Gareth Rees' patch for Python issue #12691 (untokenizing)
8 8 - Except we don't encode the output of untokenize
9 9 - Python 2 compatible syntax, so that it can be byte-compiled at installation
10 10 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
11 11 on whether they are in a multi-line statement. Filed as Python issue #17061.
12 12 - Export generate_tokens & TokenError
13 13 - u and rb literals are allowed under Python 3.3 and above.
14 14
15 15 ------------------------------------------------------------------------------
16 16
17 17 Tokenization help for Python programs.
18 18
19 19 tokenize(readline) is a generator that breaks a stream of bytes into
20 20 Python tokens. It decodes the bytes according to PEP-0263 for
21 21 determining source file encoding.
22 22
23 23 It accepts a readline-like method which is called repeatedly to get the
24 24 next line of input (or b"" for EOF). It generates 5-tuples with these
25 25 members:
26 26
27 27 the token type (see token.py)
28 28 the token (a string)
29 29 the starting (row, column) indices of the token (a 2-tuple of ints)
30 30 the ending (row, column) indices of the token (a 2-tuple of ints)
31 31 the original line (string)
32 32
33 33 It is designed to match the working of the Python tokenizer exactly, except
34 34 that it produces COMMENT tokens for comments and gives type OP for all
35 35 operators. Additionally, all token lists start with an ENCODING token
36 36 which tells you which encoding was used to decode the bytes stream.
37 37 """
38 38
39 39 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
40 40 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
41 41 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
42 42 'Michael Foord')
43 43 import builtins
44 44 import re
45 45 import sys
46 46 from token import *
47 47 from codecs import lookup, BOM_UTF8
48 48 import collections
49 49 from io import TextIOWrapper
50 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
50 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
51 51
52 52 import token
53 53 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
54 54 "NL", "untokenize", "ENCODING", "TokenInfo"]
55 55 del token
56 56
57 57 __all__ += ["generate_tokens", "TokenError"]
58 58
59 59 COMMENT = N_TOKENS
60 60 tok_name[COMMENT] = 'COMMENT'
61 61 NL = N_TOKENS + 1
62 62 tok_name[NL] = 'NL'
63 63 ENCODING = N_TOKENS + 2
64 64 tok_name[ENCODING] = 'ENCODING'
65 65 N_TOKENS += 3
66 66
67 67 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
68 68 def __repr__(self):
69 69 annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
70 70 return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
71 71 self._replace(type=annotated_type))
72 72
73 73 def group(*choices): return '(' + '|'.join(choices) + ')'
74 74 def any(*choices): return group(*choices) + '*'
75 75 def maybe(*choices): return group(*choices) + '?'
76 76
77 77 # Note: we use unicode matching for names ("\w") but ascii matching for
78 78 # number literals.
79 79 Whitespace = r'[ \f\t]*'
80 80 Comment = r'#[^\r\n]*'
81 81 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
82 82 Name = r'\w+'
83 83
84 84 Hexnumber = r'0[xX][0-9a-fA-F]+'
85 85 Binnumber = r'0[bB][01]+'
86 86 Octnumber = r'0[oO][0-7]+'
87 87 Decnumber = r'(?:0+|[1-9][0-9]*)'
88 88 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
89 89 Exponent = r'[eE][-+]?[0-9]+'
90 90 Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
91 91 Expfloat = r'[0-9]+' + Exponent
92 92 Floatnumber = group(Pointfloat, Expfloat)
93 93 Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
94 94 Number = group(Imagnumber, Floatnumber, Intnumber)
95 95 StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
96 96
97 97 # Tail end of ' string.
98 98 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
99 99 # Tail end of " string.
100 100 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
101 101 # Tail end of ''' string.
102 102 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
103 103 # Tail end of """ string.
104 104 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
105 105 Triple = group(StringPrefix + "'''", StringPrefix + '"""')
106 106 # Single-line ' or " string.
107 107 String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
108 108 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
109 109
110 110 # Because of leftmost-then-longest match semantics, be sure to put the
111 111 # longest operators first (e.g., if = came before ==, == would get
112 112 # recognized as two instances of =).
113 113 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
114 114 r"//=?", r"->",
115 115 r"[+\-*/%&|^=<>]=?",
116 116 r"~")
117 117
118 118 Bracket = '[][(){}]'
119 119 Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
120 120 Funny = group(Operator, Bracket, Special)
121 121
122 122 PlainToken = group(Number, Funny, String, Name)
123 123 Token = Ignore + PlainToken
124 124
125 125 # First (or only) line of ' or " string.
126 126 ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
127 127 group("'", r'\\\r?\n'),
128 128 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
129 129 group('"', r'\\\r?\n'))
130 130 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
131 131 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
132 132
133 133 def _compile(expr):
134 134 return re.compile(expr, re.UNICODE)
135 135
136 136 tokenprog, pseudoprog, single3prog, double3prog = map(
137 137 _compile, (Token, PseudoToken, Single3, Double3))
138 138 endprogs = {"'": _compile(Single), '"': _compile(Double),
139 139 "'''": single3prog, '"""': double3prog,
140 140 "r'''": single3prog, 'r"""': double3prog,
141 141 "b'''": single3prog, 'b"""': double3prog,
142 142 "R'''": single3prog, 'R"""': double3prog,
143 143 "B'''": single3prog, 'B"""': double3prog,
144 144 "br'''": single3prog, 'br"""': double3prog,
145 145 "bR'''": single3prog, 'bR"""': double3prog,
146 146 "Br'''": single3prog, 'Br"""': double3prog,
147 147 "BR'''": single3prog, 'BR"""': double3prog,
148 148 'r': None, 'R': None, 'b': None, 'B': None}
149 149
150 150 triple_quoted = {}
151 151 for t in ("'''", '"""',
152 152 "r'''", 'r"""', "R'''", 'R"""',
153 153 "b'''", 'b"""', "B'''", 'B"""',
154 154 "br'''", 'br"""', "Br'''", 'Br"""',
155 155 "bR'''", 'bR"""', "BR'''", 'BR"""'):
156 156 triple_quoted[t] = t
157 157 single_quoted = {}
158 158 for t in ("'", '"',
159 159 "r'", 'r"', "R'", 'R"',
160 160 "b'", 'b"', "B'", 'B"',
161 161 "br'", 'br"', "Br'", 'Br"',
162 162 "bR'", 'bR"', "BR'", 'BR"' ):
163 163 single_quoted[t] = t
164 164
165 165 for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
166 166 _t2 = _prefix+'"""'
167 167 endprogs[_t2] = double3prog
168 168 triple_quoted[_t2] = _t2
169 169 _t1 = _prefix + "'''"
170 170 endprogs[_t1] = single3prog
171 171 triple_quoted[_t1] = _t1
172 172 single_quoted[_prefix+'"'] = _prefix+'"'
173 173 single_quoted[_prefix+"'"] = _prefix+"'"
174 174 del _prefix, _t2, _t1
175 175 endprogs['u'] = None
176 176 endprogs['U'] = None
177 177
178 178 del _compile
179 179
180 180 tabsize = 8
181 181
182 182 class TokenError(Exception): pass
183 183
184 184 class StopTokenizing(Exception): pass
185 185
186 186
187 187 class Untokenizer:
188 188
189 189 def __init__(self):
190 190 self.tokens = []
191 191 self.prev_row = 1
192 192 self.prev_col = 0
193 193 self.encoding = 'utf-8'
194 194
195 195 def add_whitespace(self, tok_type, start):
196 196 row, col = start
197 197 assert row >= self.prev_row
198 198 col_offset = col - self.prev_col
199 199 if col_offset > 0:
200 200 self.tokens.append(" " * col_offset)
201 201 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
202 202 # Line was backslash-continued.
203 203 self.tokens.append(" ")
204 204
205 205 def untokenize(self, tokens):
206 206 iterable = iter(tokens)
207 207 for t in iterable:
208 208 if len(t) == 2:
209 209 self.compat(t, iterable)
210 210 break
211 211 tok_type, token, start, end = t[:4]
212 212 if tok_type == ENCODING:
213 213 self.encoding = token
214 214 continue
215 215 self.add_whitespace(tok_type, start)
216 216 self.tokens.append(token)
217 217 self.prev_row, self.prev_col = end
218 218 if tok_type in (NEWLINE, NL):
219 219 self.prev_row += 1
220 220 self.prev_col = 0
221 221 return "".join(self.tokens)
222 222
223 223 def compat(self, token, iterable):
224 224 # This import is here to avoid problems when the itertools
225 225 # module is not built yet and tokenize is imported.
226 226 from itertools import chain
227 227 startline = False
228 228 prevstring = False
229 229 indents = []
230 230 toks_append = self.tokens.append
231 231
232 232 for tok in chain([token], iterable):
233 233 toknum, tokval = tok[:2]
234 234 if toknum == ENCODING:
235 235 self.encoding = tokval
236 236 continue
237 237
238 238 if toknum in (NAME, NUMBER):
239 239 tokval += ' '
240 240
241 241 # Insert a space between two consecutive strings
242 242 if toknum == STRING:
243 243 if prevstring:
244 244 tokval = ' ' + tokval
245 245 prevstring = True
246 246 else:
247 247 prevstring = False
248 248
249 249 if toknum == INDENT:
250 250 indents.append(tokval)
251 251 continue
252 252 elif toknum == DEDENT:
253 253 indents.pop()
254 254 continue
255 255 elif toknum in (NEWLINE, NL):
256 256 startline = True
257 257 elif startline and indents:
258 258 toks_append(indents[-1])
259 259 startline = False
260 260 toks_append(tokval)
261 261
262 262
263 263 def untokenize(tokens):
264 264 """
265 265 Convert ``tokens`` (an iterable) back into Python source code. Return
266 266 a bytes object, encoded using the encoding specified by the last
267 267 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
268 268
269 269 The result is guaranteed to tokenize back to match the input so that
270 270 the conversion is lossless and round-trips are assured. The
271 271 guarantee applies only to the token type and token string as the
272 272 spacing between tokens (column positions) may change.
273 273
274 274 :func:`untokenize` has two modes. If the input tokens are sequences
275 275 of length 2 (``type``, ``string``) then spaces are added as necessary to
276 276 preserve the round-trip property.
277 277
278 278 If the input tokens are sequences of length 4 or more (``type``,
279 279 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
280 280 spaces are added so that each token appears in the result at the
281 281 position indicated by ``start`` and ``end``, if possible.
282 282 """
283 283 return Untokenizer().untokenize(tokens)
284 284
285 285
286 286 def _get_normal_name(orig_enc):
287 287 """Imitates get_normal_name in tokenizer.c."""
288 288 # Only care about the first 12 characters.
289 289 enc = orig_enc[:12].lower().replace("_", "-")
290 290 if enc == "utf-8" or enc.startswith("utf-8-"):
291 291 return "utf-8"
292 292 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
293 293 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
294 294 return "iso-8859-1"
295 295 return orig_enc
296 296
297 297 def detect_encoding(readline):
298 298 """
299 299 The detect_encoding() function is used to detect the encoding that should
300 300 be used to decode a Python source file. It requires one argument, readline,
301 301 in the same way as the tokenize() generator.
302 302
303 303 It will call readline a maximum of twice, and return the encoding used
304 304 (as a string) and a list of any lines (left as bytes) it has read in.
305 305
306 306 It detects the encoding from the presence of a utf-8 bom or an encoding
307 307 cookie as specified in pep-0263. If both a bom and a cookie are present,
308 308 but disagree, a SyntaxError will be raised. If the encoding cookie is an
309 309 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
310 310 'utf-8-sig' is returned.
311 311
312 312 If no encoding is specified, then the default of 'utf-8' will be returned.
313 313 """
314 314 bom_found = False
315 315 encoding = None
316 316 default = 'utf-8'
317 317 def read_or_stop():
318 318 try:
319 319 return readline()
320 320 except StopIteration:
321 321 return b''
322 322
323 323 def find_cookie(line):
324 324 try:
325 325 # Decode as UTF-8. Either the line is an encoding declaration,
326 326 # in which case it should be pure ASCII, or it must be UTF-8
327 327 # per default encoding.
328 328 line_string = line.decode('utf-8')
329 329 except UnicodeDecodeError:
330 330 raise SyntaxError("invalid or missing encoding declaration")
331 331
332 332 matches = cookie_re.findall(line_string)
333 333 if not matches:
334 334 return None
335 335 encoding = _get_normal_name(matches[0])
336 336 try:
337 337 codec = lookup(encoding)
338 338 except LookupError:
339 339 # This behaviour mimics the Python interpreter
340 340 raise SyntaxError("unknown encoding: " + encoding)
341 341
342 342 if bom_found:
343 343 if encoding != 'utf-8':
344 344 # This behaviour mimics the Python interpreter
345 345 raise SyntaxError('encoding problem: utf-8')
346 346 encoding += '-sig'
347 347 return encoding
348 348
349 349 first = read_or_stop()
350 350 if first.startswith(BOM_UTF8):
351 351 bom_found = True
352 352 first = first[3:]
353 353 default = 'utf-8-sig'
354 354 if not first:
355 355 return default, []
356 356
357 357 encoding = find_cookie(first)
358 358 if encoding:
359 359 return encoding, [first]
360 360
361 361 second = read_or_stop()
362 362 if not second:
363 363 return default, [first]
364 364
365 365 encoding = find_cookie(second)
366 366 if encoding:
367 367 return encoding, [first, second]
368 368
369 369 return default, [first, second]
370 370
371 371
372 372 def open(filename):
373 373 """Open a file in read only mode using the encoding detected by
374 374 detect_encoding().
375 375 """
376 376 buffer = builtins.open(filename, 'rb')
377 377 encoding, lines = detect_encoding(buffer.readline)
378 378 buffer.seek(0)
379 379 text = TextIOWrapper(buffer, encoding, line_buffering=True)
380 380 text.mode = 'r'
381 381 return text
382 382
383 383
384 384 def tokenize(readline):
385 385 """
386 386 The tokenize() generator requires one argument, readline, which
387 387 must be a callable object which provides the same interface as the
388 388 readline() method of built-in file objects. Each call to the function
389 389 should return one line of input as bytes. Alternately, readline
390 390 can be a callable function terminating with :class:`StopIteration`::
391 391
392 392 readline = open(myfile, 'rb').__next__ # Example of alternate readline
393 393
394 394 The generator produces 5-tuples with these members: the token type; the
395 395 token string; a 2-tuple (srow, scol) of ints specifying the row and
396 396 column where the token begins in the source; a 2-tuple (erow, ecol) of
397 397 ints specifying the row and column where the token ends in the source;
398 398 and the line on which the token was found. The line passed is the
399 399 logical line; continuation lines are included.
400 400
401 401 The first token sequence will always be an ENCODING token
402 402 which tells you which encoding was used to decode the bytes stream.
403 403 """
404 404 # This import is here to avoid problems when the itertools module is not
405 405 # built yet and tokenize is imported.
406 406 from itertools import chain, repeat
407 407 encoding, consumed = detect_encoding(readline)
408 408 rl_gen = iter(readline, b"")
409 409 empty = repeat(b"")
410 410 return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
411 411
412 412
413 413 def _tokenize(readline, encoding):
414 414 lnum = parenlev = continued = 0
415 415 numchars = '0123456789'
416 416 contstr, needcont = '', 0
417 417 contline = None
418 418 indents = [0]
419 419
420 420 if encoding is not None:
421 421 if encoding == "utf-8-sig":
422 422 # BOM will already have been stripped.
423 423 encoding = "utf-8"
424 424 yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
425 425 while True: # loop over lines in stream
426 426 try:
427 427 line = readline()
428 428 except StopIteration:
429 429 line = b''
430 430
431 431 if encoding is not None:
432 432 line = line.decode(encoding)
433 433 lnum += 1
434 434 pos, max = 0, len(line)
435 435
436 436 if contstr: # continued string
437 437 if not line:
438 438 raise TokenError("EOF in multi-line string", strstart)
439 439 endmatch = endprog.match(line)
440 440 if endmatch:
441 441 pos = end = endmatch.end(0)
442 442 yield TokenInfo(STRING, contstr + line[:end],
443 443 strstart, (lnum, end), contline + line)
444 444 contstr, needcont = '', 0
445 445 contline = None
446 446 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
447 447 yield TokenInfo(ERRORTOKEN, contstr + line,
448 448 strstart, (lnum, len(line)), contline)
449 449 contstr = ''
450 450 contline = None
451 451 continue
452 452 else:
453 453 contstr = contstr + line
454 454 contline = contline + line
455 455 continue
456 456
457 457 elif parenlev == 0 and not continued: # new statement
458 458 if not line: break
459 459 column = 0
460 460 while pos < max: # measure leading whitespace
461 461 if line[pos] == ' ':
462 462 column += 1
463 463 elif line[pos] == '\t':
464 464 column = (column//tabsize + 1)*tabsize
465 465 elif line[pos] == '\f':
466 466 column = 0
467 467 else:
468 468 break
469 469 pos += 1
470 470 if pos == max:
471 471 break
472 472
473 473 if line[pos] in '#\r\n': # skip comments or blank lines
474 474 if line[pos] == '#':
475 475 comment_token = line[pos:].rstrip('\r\n')
476 476 nl_pos = pos + len(comment_token)
477 477 yield TokenInfo(COMMENT, comment_token,
478 478 (lnum, pos), (lnum, pos + len(comment_token)), line)
479 479 yield TokenInfo(NEWLINE, line[nl_pos:],
480 480 (lnum, nl_pos), (lnum, len(line)), line)
481 481 else:
482 482 yield TokenInfo(NEWLINE, line[pos:],
483 483 (lnum, pos), (lnum, len(line)), line)
484 484 continue
485 485
486 486 if column > indents[-1]: # count indents or dedents
487 487 indents.append(column)
488 488 yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
489 489 while column < indents[-1]:
490 490 if column not in indents:
491 491 raise IndentationError(
492 492 "unindent does not match any outer indentation level",
493 493 ("<tokenize>", lnum, pos, line))
494 494 indents = indents[:-1]
495 495 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
496 496
497 497 else: # continued statement
498 498 if not line:
499 499 raise TokenError("EOF in multi-line statement", (lnum, 0))
500 500 continued = 0
501 501
502 502 while pos < max:
503 503 pseudomatch = pseudoprog.match(line, pos)
504 504 if pseudomatch: # scan for tokens
505 505 start, end = pseudomatch.span(1)
506 506 spos, epos, pos = (lnum, start), (lnum, end), end
507 507 token, initial = line[start:end], line[start]
508 508
509 509 if (initial in numchars or # ordinary number
510 510 (initial == '.' and token != '.' and token != '...')):
511 511 yield TokenInfo(NUMBER, token, spos, epos, line)
512 512 elif initial in '\r\n':
513 513 yield TokenInfo(NL if parenlev > 0 else NEWLINE,
514 514 token, spos, epos, line)
515 515 elif initial == '#':
516 516 assert not token.endswith("\n")
517 517 yield TokenInfo(COMMENT, token, spos, epos, line)
518 518 elif token in triple_quoted:
519 519 endprog = endprogs[token]
520 520 endmatch = endprog.match(line, pos)
521 521 if endmatch: # all on one line
522 522 pos = endmatch.end(0)
523 523 token = line[start:pos]
524 524 yield TokenInfo(STRING, token, spos, (lnum, pos), line)
525 525 else:
526 526 strstart = (lnum, start) # multiple lines
527 527 contstr = line[start:]
528 528 contline = line
529 529 break
530 530 elif initial in single_quoted or \
531 531 token[:2] in single_quoted or \
532 532 token[:3] in single_quoted:
533 533 if token[-1] == '\n': # continued string
534 534 strstart = (lnum, start)
535 535 endprog = (endprogs[initial] or endprogs[token[1]] or
536 536 endprogs[token[2]])
537 537 contstr, needcont = line[start:], 1
538 538 contline = line
539 539 break
540 540 else: # ordinary string
541 541 yield TokenInfo(STRING, token, spos, epos, line)
542 542 elif initial.isidentifier(): # ordinary name
543 543 yield TokenInfo(NAME, token, spos, epos, line)
544 544 elif initial == '\\': # continued stmt
545 545 continued = 1
546 546 else:
547 547 if initial in '([{':
548 548 parenlev += 1
549 549 elif initial in ')]}':
550 550 parenlev -= 1
551 551 yield TokenInfo(OP, token, spos, epos, line)
552 552 else:
553 553 yield TokenInfo(ERRORTOKEN, line[pos],
554 554 (lnum, pos), (lnum, pos+1), line)
555 555 pos += 1
556 556
557 557 for indent in indents[1:]: # pop remaining indent levels
558 558 yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
559 559 yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
560 560
561 561
562 562 # An undocumented, backwards compatible, API for all the places in the standard
563 563 # library that expect to be able to use tokenize with strings
564 564 def generate_tokens(readline):
565 565 return _tokenize(readline, None)
566 566
567 567 if __name__ == "__main__":
568 568 # Quick sanity check
569 569 s = b'''def parseline(self, line):
570 570 """Parse the line into a command name and a string containing
571 571 the arguments. Returns a tuple containing (command, args, line).
572 572 'command' and 'args' may be None if the line couldn't be parsed.
573 573 """
574 574 line = line.strip()
575 575 if not line:
576 576 return None, None, line
577 577 elif line[0] == '?':
578 578 line = 'help ' + line[1:]
579 579 elif line[0] == '!':
580 580 if hasattr(self, 'do_shell'):
581 581 line = 'shell ' + line[1:]
582 582 else:
583 583 return None, None, line
584 584 i, n = 0, len(line)
585 585 while i < n and line[i] in self.identchars: i = i+1
586 586 cmd, arg = line[:i], line[i:].strip()
587 587 return cmd, arg, line
588 588 '''
589 589 for tok in tokenize(iter(s.splitlines()).__next__):
590 590 print(tok)
General Comments 0
You need to be logged in to leave comments. Login now