##// END OF EJS Templates
remove sys_version for Python 3...
Paul Ivanov -
Show More
@@ -604,7 +604,6 b' class Pdb(OldPdb, object):'
604 604 ('Globals', self.curframe.f_globals)]
605 605 self.shell.find_line_magic('psource')(arg, namespaces=namespaces)
606 606
607 if sys.version_info > (3, ):
608 607 def do_where(self, arg):
609 608 """w(here)
610 609 Print a stack trace, with the most recent frame at the bottom.
@@ -187,10 +187,7 b' class ExecutionResult(object):'
187 187 raise self.error_in_exec
188 188
189 189 def __repr__(self):
190 if sys.version_info > (3,):
191 190 name = self.__class__.__qualname__
192 else:
193 name = self.__class__.__name__
194 191 return '<%s object at %x, execution_count=%s error_before_exec=%s error_in_exec=%s result=%s>' %\
195 192 (name, id(self), self.execution_count, self.error_before_exec, self.error_in_exec, repr(self.result))
196 193
@@ -155,7 +155,6 b' def test_latex_completions():'
155 155
156 156
157 157
158 @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
159 158 def test_back_latex_completion():
160 159 ip = get_ipython()
161 160
@@ -164,7 +163,6 b' def test_back_latex_completion():'
164 163 nt.assert_equal(len(matches), 1)
165 164 nt.assert_equal(matches[0], '\\beta')
166 165
167 @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
168 166 def test_back_unicode_completion():
169 167 ip = get_ipython()
170 168
@@ -173,7 +171,6 b' def test_back_unicode_completion():'
173 171 nt.assert_equal(matches[0], '\\ROMAN NUMERAL FIVE')
174 172
175 173
176 @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
177 174 def test_forward_unicode_completion():
178 175 ip = get_ipython()
179 176
@@ -181,7 +178,6 b' def test_forward_unicode_completion():'
181 178 nt.assert_equal(len(matches), 1)
182 179 nt.assert_equal(matches[0], 'Ⅴ')
183 180
184 @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
185 181 @dec.knownfailureif(sys.platform == 'win32', 'Fails if there is a C:\\j... path')
186 182 def test_no_ascii_back_completion():
187 183 ip = get_ipython()
@@ -588,7 +584,6 b' def test_dict_key_completion_contexts():'
588 584
589 585
590 586
591 @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
592 587 def test_dict_key_completion_bytes():
593 588 """Test handling of bytes in dict key completion"""
594 589 ip = get_ipython()
@@ -618,7 +613,6 b' def test_dict_key_completion_bytes():'
618 613 nt.assert_not_in("abd", matches)
619 614
620 615
621 @dec.onlyif(sys.version_info[0] < 3, 'This test only applies in Py<3')
622 616 def test_dict_key_completion_unicode_py2():
623 617 """Test handling of unicode in dict key completion"""
624 618 ip = get_ipython()
@@ -679,7 +673,6 b' def test_dict_key_completion_unicode_py2():'
679 673 nt.assert_in(u"d[u'a\u05d0b']", matches)
680 674
681 675
682 @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
683 676 def test_dict_key_completion_unicode_py3():
684 677 """Test handling of unicode in dict key completion"""
685 678 ip = get_ipython()
@@ -338,19 +338,6 b' class InteractiveShellTestCase(unittest.TestCase):'
338 338 finally:
339 339 trap.hook = save_hook
340 340
341 @skipif(sys.version_info[0] >= 3, "softspace removed in py3")
342 def test_print_softspace(self):
343 """Verify that softspace is handled correctly when executing multiple
344 statements.
345
346 In [1]: print 1; print 2
347 1
348 2
349
350 In [2]: print 1,; print 2
351 1 2
352 """
353
354 341 def test_ofind_line_magic(self):
355 342 from IPython.core.magic import register_line_magic
356 343
@@ -466,22 +453,6 b' class InteractiveShellTestCase(unittest.TestCase):'
466 453 # Reset the custom exception hook
467 454 ip.set_custom_exc((), None)
468 455
469 @skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
470 def test_future_environment(self):
471 "Can we run code with & without the shell's __future__ imports?"
472 ip.run_cell("from __future__ import division")
473 ip.run_cell("a = 1/2", shell_futures=True)
474 self.assertEqual(ip.user_ns['a'], 0.5)
475 ip.run_cell("b = 1/2", shell_futures=False)
476 self.assertEqual(ip.user_ns['b'], 0)
477
478 ip.compile.reset_compiler_flags()
479 # This shouldn't leak to the shell's compiler
480 ip.run_cell("from __future__ import division \nc=1/2", shell_futures=False)
481 self.assertEqual(ip.user_ns['c'], 0.5)
482 ip.run_cell("d = 1/2", shell_futures=True)
483 self.assertEqual(ip.user_ns['d'], 0)
484
485 456 def test_mktempfile(self):
486 457 filename = ip.mktempfile()
487 458 # Check that we can open the file again on Windows
@@ -509,9 +480,6 b' class InteractiveShellTestCase(unittest.TestCase):'
509 480 raise DerivedInterrupt("foo")
510 481 except KeyboardInterrupt:
511 482 msg = ip.get_exception_only()
512 if sys.version_info[0] <= 2:
513 self.assertEqual(msg, 'DerivedInterrupt: foo\n')
514 else:
515 483 self.assertEqual(msg, 'IPython.core.tests.test_interactiveshell.DerivedInterrupt: foo\n')
516 484
517 485 def test_inspect_text(self):
@@ -377,18 +377,6 b' def test_time3():'
377 377 "run = 0\n"
378 378 "run += 1")
379 379
380 @dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
381 def test_time_futures():
382 "Test %time with __future__ environments"
383 ip = get_ipython()
384 ip.autocall = 0
385 ip.run_cell("from __future__ import division")
386 with tt.AssertPrints('0.25'):
387 ip.run_line_magic('time', 'print(1/4)')
388 ip.compile.reset_compiler_flags()
389 with tt.AssertNotPrints('0.25'):
390 ip.run_line_magic('time', 'print(1/4)')
391
392 380 def test_doctest_mode():
393 381 "Toggle doctest_mode twice, it should be a no-op and run without error"
394 382 _ip.magic('doctest_mode')
@@ -573,17 +561,6 b' def test_timeit_return_quiet():'
573 561 res = _ip.run_line_magic('timeit', '-n1 -r1 -q -o 1')
574 562 assert (res is not None)
575 563
576 @dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
577 def test_timeit_futures():
578 "Test %timeit with __future__ environments"
579 ip = get_ipython()
580 ip.run_cell("from __future__ import division")
581 with tt.AssertPrints('0.25'):
582 ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
583 ip.compile.reset_compiler_flags()
584 with tt.AssertNotPrints('0.25'):
585 ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
586
587 564 @dec.skipif(execution.profile is None)
588 565 def test_prun_special_syntax():
589 566 "Test %%prun with IPython special syntax"
@@ -57,8 +57,6 b' def check_cpaste(code, should_fail=False):'
57 57 finally:
58 58 sys.stdin = stdin_save
59 59
60 PY31 = sys.version_info[:2] == (3,1)
61
62 60 def test_cpaste():
63 61 """Test cpaste magic"""
64 62
@@ -77,14 +75,9 b' def test_cpaste():'
77 75 ],
78 76
79 77 'fail': ["1 + runf()",
78 "++ runf()",
80 79 ]}
81 80
82 # I don't know why this is failing specifically on Python 3.1. I've
83 # checked it manually interactively, but we don't care enough about 3.1
84 # to spend time fiddling with the tests, so we just skip it.
85 if not PY31:
86 tests['fail'].append("++ runf()")
87
88 81 ip.user_ns['runf'] = runf
89 82
90 83 for code in tests['pass']:
@@ -275,7 +275,6 b' def test_info():'
275 275 nt.assert_equal(i['type_name'], 'type')
276 276 expted_class = str(type(type)) # <class 'type'> (Python 3) or <type 'type'>
277 277 nt.assert_equal(i['base_class'], expted_class)
278 if sys.version_info > (3,):
279 278 nt.assert_regex(i['string_form'], "<class 'IPython.core.tests.test_oinspect.Call'( at 0x[0-9a-f]{1,9})?>")
280 279 fname = __file__
281 280 if fname.endswith(".pyc"):
@@ -313,7 +313,6 b' def r3o2():'
313 313 #----------------------------------------------------------------------------
314 314
315 315 # module testing (minimal)
316 if sys.version_info > (3,):
317 316 def test_handlers():
318 317 def spam(c, d_e):
319 318 (d, e) = d_e
@@ -45,7 +45,6 b' pjoin = path.join'
45 45
46 46 # Enable printing all warnings raise by IPython's modules
47 47 warnings.filterwarnings('ignore', message='.*Matplotlib is building the font cache.*', category=UserWarning, module='.*')
48 if sys.version_info > (3,0):
49 48 warnings.filterwarnings('error', message='.*', category=ResourceWarning, module='.*')
50 49 warnings.filterwarnings('error', message=".*{'config': True}.*", category=DeprecationWarning, module='IPy.*')
51 50 warnings.filterwarnings('default', message='.*', category=Warning, module='IPy.*')
@@ -1,26 +1,13 b''
1 """Decorators marks that a doctest should be skipped, for both python 2 and 3.
1 """Decorators marks that a doctest should be skipped.
2 2
3 3 The IPython.testing.decorators module triggers various extra imports, including
4 4 numpy and sympy if they're present. Since this decorator is used in core parts
5 5 of IPython, it's in a separate module so that running IPython doesn't trigger
6 6 those imports."""
7 7
8 #-----------------------------------------------------------------------------
9 # Copyright (C) 2009-2011 The IPython Development Team
10 #
11 # Distributed under the terms of the BSD License. The full license is in
12 # the file COPYING, distributed as part of this software.
13 #-----------------------------------------------------------------------------
8 # Copyright (C) IPython Development Team
9 # Distributed under the terms of the Modified BSD License.
14 10
15 #-----------------------------------------------------------------------------
16 # Imports
17 #-----------------------------------------------------------------------------
18
19 import sys
20
21 #-----------------------------------------------------------------------------
22 # Decorators
23 #-----------------------------------------------------------------------------
24 11
25 12 def skip_doctest(f):
26 13 """Decorator - mark a function or method for skipping its doctest.
This diff has been collapsed as it changes many lines, (594 lines changed) Show them Hide them
@@ -1,9 +1,595 b''
1 """Load our patched versions of tokenize.
1 """Patched version of standard library tokenize, to deal with various bugs.
2
3 Based on Python 3.2 code.
4
5 Patches:
6
7 - Gareth Rees' patch for Python issue #12691 (untokenizing)
8 - Except we don't encode the output of untokenize
9 - Python 2 compatible syntax, so that it can be byte-compiled at installation
10 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
11 on whether they are in a multi-line statement. Filed as Python issue #17061.
12 - Export generate_tokens & TokenError
13 - u and rb literals are allowed under Python 3.3 and above.
14
15 ------------------------------------------------------------------------------
16 Tokenization help for Python programs.
17
18 tokenize(readline) is a generator that breaks a stream of bytes into
19 Python tokens. It decodes the bytes according to PEP-0263 for
20 determining source file encoding.
21
22 It accepts a readline-like method which is called repeatedly to get the
23 next line of input (or b"" for EOF). It generates 5-tuples with these
24 members:
25
26 the token type (see token.py)
27 the token (a string)
28 the starting (row, column) indices of the token (a 2-tuple of ints)
29 the ending (row, column) indices of the token (a 2-tuple of ints)
30 the original line (string)
31
32 It is designed to match the working of the Python tokenizer exactly, except
33 that it produces COMMENT tokens for comments and gives type OP for all
34 operators. Additionally, all token lists start with an ENCODING token
35 which tells you which encoding was used to decode the bytes stream.
2 36 """
37 from __future__ import absolute_import
3 38
39 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
40 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
41 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
42 'Michael Foord')
43 import builtins
44 import re
4 45 import sys
46 from token import *
47 from codecs import lookup, BOM_UTF8
48 import collections
49 from io import TextIOWrapper
50 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
51
52 import token
53 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
54 "NL", "untokenize", "ENCODING", "TokenInfo"]
55 del token
56
57 __all__ += ["generate_tokens", "TokenError"]
58
59 COMMENT = N_TOKENS
60 tok_name[COMMENT] = 'COMMENT'
61 NL = N_TOKENS + 1
62 tok_name[NL] = 'NL'
63 ENCODING = N_TOKENS + 2
64 tok_name[ENCODING] = 'ENCODING'
65 N_TOKENS += 3
66
67 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
68 def __repr__(self):
69 annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
70 return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
71 self._replace(type=annotated_type))
72
73 def group(*choices): return '(' + '|'.join(choices) + ')'
74 def any(*choices): return group(*choices) + '*'
75 def maybe(*choices): return group(*choices) + '?'
76
77 # Note: we use unicode matching for names ("\w") but ascii matching for
78 # number literals.
79 Whitespace = r'[ \f\t]*'
80 Comment = r'#[^\r\n]*'
81 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
82 Name = r'\w+'
83
84 Hexnumber = r'0[xX][0-9a-fA-F]+'
85 Binnumber = r'0[bB][01]+'
86 Octnumber = r'0[oO][0-7]+'
87 Decnumber = r'(?:0+|[1-9][0-9]*)'
88 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
89 Exponent = r'[eE][-+]?[0-9]+'
90 Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
91 Expfloat = r'[0-9]+' + Exponent
92 Floatnumber = group(Pointfloat, Expfloat)
93 Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
94 Number = group(Imagnumber, Floatnumber, Intnumber)
95
96 if sys.version_info.minor >= 3:
97 StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
98 else:
99 StringPrefix = r'(?:[bB]?[rR]?)?'
100
101 # Tail end of ' string.
102 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
103 # Tail end of " string.
104 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
105 # Tail end of ''' string.
106 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
107 # Tail end of """ string.
108 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
109 Triple = group(StringPrefix + "'''", StringPrefix + '"""')
110 # Single-line ' or " string.
111 String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
112 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
113
114 # Because of leftmost-then-longest match semantics, be sure to put the
115 # longest operators first (e.g., if = came before ==, == would get
116 # recognized as two instances of =).
117 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
118 r"//=?", r"->",
119 r"[+\-*/%&|^=<>]=?",
120 r"~")
121
122 Bracket = '[][(){}]'
123 Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
124 Funny = group(Operator, Bracket, Special)
125
126 PlainToken = group(Number, Funny, String, Name)
127 Token = Ignore + PlainToken
128
129 # First (or only) line of ' or " string.
130 ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131 group("'", r'\\\r?\n'),
132 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133 group('"', r'\\\r?\n'))
134 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
135 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
136
137 def _compile(expr):
138 return re.compile(expr, re.UNICODE)
139
140 tokenprog, pseudoprog, single3prog, double3prog = map(
141 _compile, (Token, PseudoToken, Single3, Double3))
142 endprogs = {"'": _compile(Single), '"': _compile(Double),
143 "'''": single3prog, '"""': double3prog,
144 "r'''": single3prog, 'r"""': double3prog,
145 "b'''": single3prog, 'b"""': double3prog,
146 "R'''": single3prog, 'R"""': double3prog,
147 "B'''": single3prog, 'B"""': double3prog,
148 "br'''": single3prog, 'br"""': double3prog,
149 "bR'''": single3prog, 'bR"""': double3prog,
150 "Br'''": single3prog, 'Br"""': double3prog,
151 "BR'''": single3prog, 'BR"""': double3prog,
152 'r': None, 'R': None, 'b': None, 'B': None}
153
154 triple_quoted = {}
155 for t in ("'''", '"""',
156 "r'''", 'r"""', "R'''", 'R"""',
157 "b'''", 'b"""', "B'''", 'B"""',
158 "br'''", 'br"""', "Br'''", 'Br"""',
159 "bR'''", 'bR"""', "BR'''", 'BR"""'):
160 triple_quoted[t] = t
161 single_quoted = {}
162 for t in ("'", '"',
163 "r'", 'r"', "R'", 'R"',
164 "b'", 'b"', "B'", 'B"',
165 "br'", 'br"', "Br'", 'Br"',
166 "bR'", 'bR"', "BR'", 'BR"' ):
167 single_quoted[t] = t
168
169 if sys.version_info.minor >= 3:
170 # Python 3.3
171 for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
172 _t2 = _prefix+'"""'
173 endprogs[_t2] = double3prog
174 triple_quoted[_t2] = _t2
175 _t1 = _prefix + "'''"
176 endprogs[_t1] = single3prog
177 triple_quoted[_t1] = _t1
178 single_quoted[_prefix+'"'] = _prefix+'"'
179 single_quoted[_prefix+"'"] = _prefix+"'"
180 del _prefix, _t2, _t1
181 endprogs['u'] = None
182 endprogs['U'] = None
183
184 del _compile
185
186 tabsize = 8
187
188 class TokenError(Exception): pass
189
190 class StopTokenizing(Exception): pass
191
192
193 class Untokenizer:
194
195 def __init__(self):
196 self.tokens = []
197 self.prev_row = 1
198 self.prev_col = 0
199 self.encoding = 'utf-8'
200
201 def add_whitespace(self, tok_type, start):
202 row, col = start
203 assert row >= self.prev_row
204 col_offset = col - self.prev_col
205 if col_offset > 0:
206 self.tokens.append(" " * col_offset)
207 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
208 # Line was backslash-continued.
209 self.tokens.append(" ")
210
211 def untokenize(self, tokens):
212 iterable = iter(tokens)
213 for t in iterable:
214 if len(t) == 2:
215 self.compat(t, iterable)
216 break
217 tok_type, token, start, end = t[:4]
218 if tok_type == ENCODING:
219 self.encoding = token
220 continue
221 self.add_whitespace(tok_type, start)
222 self.tokens.append(token)
223 self.prev_row, self.prev_col = end
224 if tok_type in (NEWLINE, NL):
225 self.prev_row += 1
226 self.prev_col = 0
227 return "".join(self.tokens)
228
229 def compat(self, token, iterable):
230 # This import is here to avoid problems when the itertools
231 # module is not built yet and tokenize is imported.
232 from itertools import chain
233 startline = False
234 prevstring = False
235 indents = []
236 toks_append = self.tokens.append
237
238 for tok in chain([token], iterable):
239 toknum, tokval = tok[:2]
240 if toknum == ENCODING:
241 self.encoding = tokval
242 continue
243
244 if toknum in (NAME, NUMBER):
245 tokval += ' '
246
247 # Insert a space between two consecutive strings
248 if toknum == STRING:
249 if prevstring:
250 tokval = ' ' + tokval
251 prevstring = True
252 else:
253 prevstring = False
254
255 if toknum == INDENT:
256 indents.append(tokval)
257 continue
258 elif toknum == DEDENT:
259 indents.pop()
260 continue
261 elif toknum in (NEWLINE, NL):
262 startline = True
263 elif startline and indents:
264 toks_append(indents[-1])
265 startline = False
266 toks_append(tokval)
267
268
269 def untokenize(tokens):
270 """
271 Convert ``tokens`` (an iterable) back into Python source code. Return
272 a bytes object, encoded using the encoding specified by the last
273 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
274
275 The result is guaranteed to tokenize back to match the input so that
276 the conversion is lossless and round-trips are assured. The
277 guarantee applies only to the token type and token string as the
278 spacing between tokens (column positions) may change.
279
280 :func:`untokenize` has two modes. If the input tokens are sequences
281 of length 2 (``type``, ``string``) then spaces are added as necessary to
282 preserve the round-trip property.
283
284 If the input tokens are sequences of length 4 or more (``type``,
285 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
286 spaces are added so that each token appears in the result at the
287 position indicated by ``start`` and ``end``, if possible.
288 """
289 return Untokenizer().untokenize(tokens)
290
291
292 def _get_normal_name(orig_enc):
293 """Imitates get_normal_name in tokenizer.c."""
294 # Only care about the first 12 characters.
295 enc = orig_enc[:12].lower().replace("_", "-")
296 if enc == "utf-8" or enc.startswith("utf-8-"):
297 return "utf-8"
298 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
299 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
300 return "iso-8859-1"
301 return orig_enc
302
303 def detect_encoding(readline):
304 """
305 The detect_encoding() function is used to detect the encoding that should
306 be used to decode a Python source file. It requires one argment, readline,
307 in the same way as the tokenize() generator.
308
309 It will call readline a maximum of twice, and return the encoding used
310 (as a string) and a list of any lines (left as bytes) it has read in.
311
312 It detects the encoding from the presence of a utf-8 bom or an encoding
313 cookie as specified in pep-0263. If both a bom and a cookie are present,
314 but disagree, a SyntaxError will be raised. If the encoding cookie is an
315 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
316 'utf-8-sig' is returned.
317
318 If no encoding is specified, then the default of 'utf-8' will be returned.
319 """
320 bom_found = False
321 encoding = None
322 default = 'utf-8'
323 def read_or_stop():
324 try:
325 return readline()
326 except StopIteration:
327 return b''
328
329 def find_cookie(line):
330 try:
331 # Decode as UTF-8. Either the line is an encoding declaration,
332 # in which case it should be pure ASCII, or it must be UTF-8
333 # per default encoding.
334 line_string = line.decode('utf-8')
335 except UnicodeDecodeError:
336 raise SyntaxError("invalid or missing encoding declaration")
337
338 matches = cookie_re.findall(line_string)
339 if not matches:
340 return None
341 encoding = _get_normal_name(matches[0])
342 try:
343 codec = lookup(encoding)
344 except LookupError:
345 # This behaviour mimics the Python interpreter
346 raise SyntaxError("unknown encoding: " + encoding)
347
348 if bom_found:
349 if encoding != 'utf-8':
350 # This behaviour mimics the Python interpreter
351 raise SyntaxError('encoding problem: utf-8')
352 encoding += '-sig'
353 return encoding
354
355 first = read_or_stop()
356 if first.startswith(BOM_UTF8):
357 bom_found = True
358 first = first[3:]
359 default = 'utf-8-sig'
360 if not first:
361 return default, []
362
363 encoding = find_cookie(first)
364 if encoding:
365 return encoding, [first]
366
367 second = read_or_stop()
368 if not second:
369 return default, [first]
370
371 encoding = find_cookie(second)
372 if encoding:
373 return encoding, [first, second]
374
375 return default, [first, second]
376
377
378 def open(filename):
379 """Open a file in read only mode using the encoding detected by
380 detect_encoding().
381 """
382 buffer = builtins.open(filename, 'rb')
383 encoding, lines = detect_encoding(buffer.readline)
384 buffer.seek(0)
385 text = TextIOWrapper(buffer, encoding, line_buffering=True)
386 text.mode = 'r'
387 return text
388
389
390 def tokenize(readline):
391 """
392 The tokenize() generator requires one argment, readline, which
393 must be a callable object which provides the same interface as the
394 readline() method of built-in file objects. Each call to the function
395 should return one line of input as bytes. Alternately, readline
396 can be a callable function terminating with StopIteration:
397 readline = open(myfile, 'rb').__next__ # Example of alternate readline
398
399 The generator produces 5-tuples with these members: the token type; the
400 token string; a 2-tuple (srow, scol) of ints specifying the row and
401 column where the token begins in the source; a 2-tuple (erow, ecol) of
402 ints specifying the row and column where the token ends in the source;
403 and the line on which the token was found. The line passed is the
404 logical line; continuation lines are included.
5 405
6 if sys.version_info[0] >= 3:
7 from ._tokenize_py3 import *
406 The first token sequence will always be an ENCODING token
407 which tells you which encoding was used to decode the bytes stream.
408 """
409 # This import is here to avoid problems when the itertools module is not
410 # built yet and tokenize is imported.
411 from itertools import chain, repeat
412 encoding, consumed = detect_encoding(readline)
413 rl_gen = iter(readline, b"")
414 empty = repeat(b"")
415 return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
416
417
418 def _tokenize(readline, encoding):
419 lnum = parenlev = continued = 0
420 numchars = '0123456789'
421 contstr, needcont = '', 0
422 contline = None
423 indents = [0]
424
425 if encoding is not None:
426 if encoding == "utf-8-sig":
427 # BOM will already have been stripped.
428 encoding = "utf-8"
429 yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
430 while True: # loop over lines in stream
431 try:
432 line = readline()
433 except StopIteration:
434 line = b''
435
436 if encoding is not None:
437 line = line.decode(encoding)
438 lnum += 1
439 pos, max = 0, len(line)
440
441 if contstr: # continued string
442 if not line:
443 raise TokenError("EOF in multi-line string", strstart)
444 endmatch = endprog.match(line)
445 if endmatch:
446 pos = end = endmatch.end(0)
447 yield TokenInfo(STRING, contstr + line[:end],
448 strstart, (lnum, end), contline + line)
449 contstr, needcont = '', 0
450 contline = None
451 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
452 yield TokenInfo(ERRORTOKEN, contstr + line,
453 strstart, (lnum, len(line)), contline)
454 contstr = ''
455 contline = None
456 continue
457 else:
458 contstr = contstr + line
459 contline = contline + line
460 continue
461
462 elif parenlev == 0 and not continued: # new statement
463 if not line: break
464 column = 0
465 while pos < max: # measure leading whitespace
466 if line[pos] == ' ':
467 column += 1
468 elif line[pos] == '\t':
469 column = (column//tabsize + 1)*tabsize
470 elif line[pos] == '\f':
471 column = 0
472 else:
473 break
474 pos += 1
475 if pos == max:
476 break
477
478 if line[pos] in '#\r\n': # skip comments or blank lines
479 if line[pos] == '#':
480 comment_token = line[pos:].rstrip('\r\n')
481 nl_pos = pos + len(comment_token)
482 yield TokenInfo(COMMENT, comment_token,
483 (lnum, pos), (lnum, pos + len(comment_token)), line)
484 yield TokenInfo(NEWLINE, line[nl_pos:],
485 (lnum, nl_pos), (lnum, len(line)), line)
486 else:
487 yield TokenInfo(NEWLINE, line[pos:],
488 (lnum, pos), (lnum, len(line)), line)
489 continue
490
491 if column > indents[-1]: # count indents or dedents
492 indents.append(column)
493 yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
494 while column < indents[-1]:
495 if column not in indents:
496 raise IndentationError(
497 "unindent does not match any outer indentation level",
498 ("<tokenize>", lnum, pos, line))
499 indents = indents[:-1]
500 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
501
502 else: # continued statement
503 if not line:
504 raise TokenError("EOF in multi-line statement", (lnum, 0))
505 continued = 0
506
507 while pos < max:
508 pseudomatch = pseudoprog.match(line, pos)
509 if pseudomatch: # scan for tokens
510 start, end = pseudomatch.span(1)
511 spos, epos, pos = (lnum, start), (lnum, end), end
512 token, initial = line[start:end], line[start]
513
514 if (initial in numchars or # ordinary number
515 (initial == '.' and token != '.' and token != '...')):
516 yield TokenInfo(NUMBER, token, spos, epos, line)
517 elif initial in '\r\n':
518 yield TokenInfo(NL if parenlev > 0 else NEWLINE,
519 token, spos, epos, line)
520 elif initial == '#':
521 assert not token.endswith("\n")
522 yield TokenInfo(COMMENT, token, spos, epos, line)
523 elif token in triple_quoted:
524 endprog = endprogs[token]
525 endmatch = endprog.match(line, pos)
526 if endmatch: # all on one line
527 pos = endmatch.end(0)
528 token = line[start:pos]
529 yield TokenInfo(STRING, token, spos, (lnum, pos), line)
530 else:
531 strstart = (lnum, start) # multiple lines
532 contstr = line[start:]
533 contline = line
534 break
535 elif initial in single_quoted or \
536 token[:2] in single_quoted or \
537 token[:3] in single_quoted:
538 if token[-1] == '\n': # continued string
539 strstart = (lnum, start)
540 endprog = (endprogs[initial] or endprogs[token[1]] or
541 endprogs[token[2]])
542 contstr, needcont = line[start:], 1
543 contline = line
544 break
545 else: # ordinary string
546 yield TokenInfo(STRING, token, spos, epos, line)
547 elif initial.isidentifier(): # ordinary name
548 yield TokenInfo(NAME, token, spos, epos, line)
549 elif initial == '\\': # continued stmt
550 continued = 1
551 else:
552 if initial in '([{':
553 parenlev += 1
554 elif initial in ')]}':
555 parenlev -= 1
556 yield TokenInfo(OP, token, spos, epos, line)
557 else:
558 yield TokenInfo(ERRORTOKEN, line[pos],
559 (lnum, pos), (lnum, pos+1), line)
560 pos += 1
561
562 for indent in indents[1:]: # pop remaining indent levels
563 yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
564 yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
565
566
567 # An undocumented, backwards compatible, API for all the places in the standard
568 # library that expect to be able to use tokenize with strings
569 def generate_tokens(readline):
570 return _tokenize(readline, None)
571
572 if __name__ == "__main__":
573 # Quick sanity check
574 s = b'''def parseline(self, line):
575 """Parse the line into a command name and a string containing
576 the arguments. Returns a tuple containing (command, args, line).
577 'command' and 'args' may be None if the line couldn't be parsed.
578 """
579 line = line.strip()
580 if not line:
581 return None, None, line
582 elif line[0] == '?':
583 line = 'help ' + line[1:]
584 elif line[0] == '!':
585 if hasattr(self, 'do_shell'):
586 line = 'shell ' + line[1:]
8 587 else:
9 from ._tokenize_py2 import *
588 return None, None, line
589 i, n = 0, len(line)
590 while i < n and line[i] in self.identchars: i = i+1
591 cmd, arg = line[:i], line[i:].strip()
592 return cmd, arg, line
593 '''
594 for tok in tokenize(iter(s.splitlines()).__next__):
595 print(tok)
@@ -41,8 +41,6 b' See IPython `README.rst` file for more information:'
41 41 print(error, file=sys.stderr)
42 42 sys.exit(1)
43 43
44 PY3 = (sys.version_info[0] >= 3)
45
46 44 # At least we're on the python version we need, move on.
47 45
48 46 #-------------------------------------------------------------------------------
@@ -114,9 +114,6 b' if __name__ == "__main__":'
114 114 print("DEPRECATE: backport_pr.py is deprecated and is is now recommended"
115 115 "to install `ghpro` from PyPI.", file=sys.stderr)
116 116
117 # deal with unicode
118 if sys.version_info < (3,):
119 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
120 117
121 118 # Whether to add reST urls for all issues in printout.
122 119 show_urls = True
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (595 lines changed) Show them Hide them
General Comments 0
You need to be logged in to leave comments. Login now