##// END OF EJS Templates
Fix getting unicode lines in IPython.core.debugger.
Thomas Kluyver -
Show More
@@ -0,0 +1,38 b''
1 """Wrapper around linecache which decodes files to unicode according to PEP 263.
2
3 This is only needed for Python 2 - linecache in Python 3 does the same thing
4 itself.
5 """
6 import functools
7 import linecache
8
9 from IPython.utils import py3compat
10 from IPython.utils import openpy
11
12 if py3compat.PY3:
13 getline = linecache.getline
14
15 # getlines has to be looked up at runtime, because doctests monkeypatch it.
16 @functools.wraps(linecache.getlines)
17 def getlines(filename, module_globals=None):
18 return linecache.getlines(filename, module_globals=module_globals)
19
20 else:
21 def getlines(filename, module_globals=None):
22 """Get the lines (as unicode) for a file from the cache.
23 Update the cache if it doesn't contain an entry for this file already."""
24 linesb = linecache.getlines(filename, module_globals=module_globals)
25 readline = openpy._list_readline(linesb)
26 try:
27 encoding, _ = openpy.detect_encoding(readline)
28 except SyntaxError:
29 encoding = 'ascii'
30 return [l.decode(encoding, 'replace') for l in linesb]
31
32 # This is a straight copy of linecache.getline
33 def getline(filename, lineno, module_globals=None):
34 lines = getlines(filename, module_globals)
35 if 1 <= lineno <= len(lines):
36 return lines[lineno-1]
37 else:
38 return ''
@@ -1,557 +1,529 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Pdb debugger class.
4 4
5 5 Modified from the standard pdb.Pdb class to avoid including readline, so that
6 6 the command line completion of other programs which include this isn't
7 7 damaged.
8 8
9 9 In the future, this class will be expanded with improvements over the standard
10 10 pdb.
11 11
12 12 The code in this file is mainly lifted out of cmd.py in Python 2.2, with minor
13 13 changes. Licensing should therefore be under the standard Python terms. For
14 14 details on the PSF (Python Software Foundation) standard license, see:
15 15
16 16 http://www.python.org/2.2.3/license.html"""
17 17
18 18 #*****************************************************************************
19 19 #
20 20 # This file is licensed under the PSF license.
21 21 #
22 22 # Copyright (C) 2001 Python Software Foundation, www.python.org
23 23 # Copyright (C) 2005-2006 Fernando Perez. <fperez@colorado.edu>
24 24 #
25 25 #
26 26 #*****************************************************************************
27 27 from __future__ import print_function
28 28
29 29 import bdb
30 30 import linecache
31 31 import sys
32 32
33 from IPython.utils import PyColorize, py3compat
33 from IPython.utils import PyColorize, ulinecache
34 34 from IPython.core import ipapi
35 35 from IPython.utils import coloransi, io, openpy
36 36 from IPython.core.excolors import exception_colors
37 37
38 38 # See if we can use pydb.
39 39 has_pydb = False
40 40 prompt = 'ipdb> '
41 41 #We have to check this directly from sys.argv, config struct not yet available
42 42 if '--pydb' in sys.argv:
43 43 try:
44 44 import pydb
45 45 if hasattr(pydb.pydb, "runl") and pydb.version>'1.17':
46 46 # Version 1.17 is broken, and that's what ships with Ubuntu Edgy, so we
47 47 # better protect against it.
48 48 has_pydb = True
49 49 except ImportError:
50 50 print("Pydb (http://bashdb.sourceforge.net/pydb/) does not seem to be available")
51 51
52 52 if has_pydb:
53 53 from pydb import Pdb as OldPdb
54 54 #print "Using pydb for %run -d and post-mortem" #dbg
55 55 prompt = 'ipydb> '
56 56 else:
57 57 from pdb import Pdb as OldPdb
58 58
59 59 # Allow the set_trace code to operate outside of an ipython instance, even if
60 60 # it does so with some limitations. The rest of this support is implemented in
61 61 # the Tracer constructor.
62 62 def BdbQuit_excepthook(et,ev,tb):
63 63 if et==bdb.BdbQuit:
64 64 print('Exiting Debugger.')
65 65 else:
66 66 BdbQuit_excepthook.excepthook_ori(et,ev,tb)
67 67
68 68 def BdbQuit_IPython_excepthook(self,et,ev,tb,tb_offset=None):
69 69 print('Exiting Debugger.')
70 70
71 71
72 72 class Tracer(object):
73 73 """Class for local debugging, similar to pdb.set_trace.
74 74
75 75 Instances of this class, when called, behave like pdb.set_trace, but
76 76 providing IPython's enhanced capabilities.
77 77
78 78 This is implemented as a class which must be initialized in your own code
79 79 and not as a standalone function because we need to detect at runtime
80 80 whether IPython is already active or not. That detection is done in the
81 81 constructor, ensuring that this code plays nicely with a running IPython,
82 82 while functioning acceptably (though with limitations) if outside of it.
83 83 """
84 84
85 85 def __init__(self,colors=None):
86 86 """Create a local debugger instance.
87 87
88 88 :Parameters:
89 89
90 90 - `colors` (None): a string containing the name of the color scheme to
91 91 use, it must be one of IPython's valid color schemes. If not given, the
92 92 function will default to the current IPython scheme when running inside
93 93 IPython, and to 'NoColor' otherwise.
94 94
95 95 Usage example:
96 96
97 97 from IPython.core.debugger import Tracer; debug_here = Tracer()
98 98
99 99 ... later in your code
100 100 debug_here() # -> will open up the debugger at that point.
101 101
102 102 Once the debugger activates, you can use all of its regular commands to
103 103 step through code, set breakpoints, etc. See the pdb documentation
104 104 from the Python standard library for usage details.
105 105 """
106 106
107 107 try:
108 108 ip = get_ipython()
109 109 except NameError:
110 110 # Outside of ipython, we set our own exception hook manually
111 111 BdbQuit_excepthook.excepthook_ori = sys.excepthook
112 112 sys.excepthook = BdbQuit_excepthook
113 113 def_colors = 'NoColor'
114 114 try:
115 115 # Limited tab completion support
116 116 import readline
117 117 readline.parse_and_bind('tab: complete')
118 118 except ImportError:
119 119 pass
120 120 else:
121 121 # In ipython, we use its custom exception handler mechanism
122 122 def_colors = ip.colors
123 123 ip.set_custom_exc((bdb.BdbQuit,), BdbQuit_IPython_excepthook)
124 124
125 125 if colors is None:
126 126 colors = def_colors
127 127
128 128 # The stdlib debugger internally uses a modified repr from the `repr`
129 129 # module, that limits the length of printed strings to a hardcoded
130 130 # limit of 30 characters. That much trimming is too aggressive, let's
131 131 # at least raise that limit to 80 chars, which should be enough for
132 132 # most interactive uses.
133 133 try:
134 134 from repr import aRepr
135 135 aRepr.maxstring = 80
136 136 except:
137 137 # This is only a user-facing convenience, so any error we encounter
138 138 # here can be warned about but can be otherwise ignored. These
139 139 # printouts will tell us about problems if this API changes
140 140 import traceback
141 141 traceback.print_exc()
142 142
143 143 self.debugger = Pdb(colors)
144 144
145 145 def __call__(self):
146 146 """Starts an interactive debugger at the point where called.
147 147
148 148 This is similar to the pdb.set_trace() function from the std lib, but
149 149 using IPython's enhanced debugger."""
150 150
151 151 self.debugger.set_trace(sys._getframe().f_back)
152 152
153 153
154 154 def decorate_fn_with_doc(new_fn, old_fn, additional_text=""):
155 155 """Make new_fn have old_fn's doc string. This is particularly useful
156 156 for the do_... commands that hook into the help system.
157 157 Adapted from from a comp.lang.python posting
158 158 by Duncan Booth."""
159 159 def wrapper(*args, **kw):
160 160 return new_fn(*args, **kw)
161 161 if old_fn.__doc__:
162 162 wrapper.__doc__ = old_fn.__doc__ + additional_text
163 163 return wrapper
164 164
165 165
166 166 def _file_lines(fname):
167 167 """Return the contents of a named file as a list of lines.
168 168
169 169 This function never raises an IOError exception: if the file can't be
170 170 read, it simply returns an empty list."""
171 171
172 172 try:
173 173 outfile = open(fname)
174 174 except IOError:
175 175 return []
176 176 else:
177 177 out = outfile.readlines()
178 178 outfile.close()
179 179 return out
180 180
181 181
182 def _readline(x):
183 """helper to pop elements off list of string
184
185 call with list of strings, return readline function that will pop
186 one line off the beginning of a copy of the list with each call.
187 raise StopIteration when empty or on third call
188 """
189 x = x[:2]
190 def readline():
191 if x:
192 return x.pop(0)
193 else:
194 raise StopIteration
195 return readline
196
197
198 182 class Pdb(OldPdb):
199 183 """Modified Pdb class, does not load readline."""
200 184
201 185 def __init__(self,color_scheme='NoColor',completekey=None,
202 186 stdin=None, stdout=None):
203 187
204 188 # Parent constructor:
205 189 if has_pydb and completekey is None:
206 190 OldPdb.__init__(self,stdin=stdin,stdout=io.stdout)
207 191 else:
208 192 OldPdb.__init__(self,completekey,stdin,stdout)
209 193
210 194 self.prompt = prompt # The default prompt is '(Pdb)'
211 195
212 196 # IPython changes...
213 197 self.is_pydb = has_pydb
214 198
215 199 self.shell = ipapi.get()
216 200
217 201 if self.is_pydb:
218 202
219 203 # interactiveshell.py's ipalias seems to want pdb's checkline
220 204 # which located in pydb.fn
221 205 import pydb.fns
222 206 self.checkline = lambda filename, lineno: \
223 207 pydb.fns.checkline(self, filename, lineno)
224 208
225 209 self.curframe = None
226 210 self.do_restart = self.new_do_restart
227 211
228 212 self.old_all_completions = self.shell.Completer.all_completions
229 213 self.shell.Completer.all_completions=self.all_completions
230 214
231 215 self.do_list = decorate_fn_with_doc(self.list_command_pydb,
232 216 OldPdb.do_list)
233 217 self.do_l = self.do_list
234 218 self.do_frame = decorate_fn_with_doc(self.new_do_frame,
235 219 OldPdb.do_frame)
236 220
237 221 self.aliases = {}
238 222
239 223 # Create color table: we copy the default one from the traceback
240 224 # module and add a few attributes needed for debugging
241 225 self.color_scheme_table = exception_colors()
242 226
243 227 # shorthands
244 228 C = coloransi.TermColors
245 229 cst = self.color_scheme_table
246 230
247 231 cst['NoColor'].colors.breakpoint_enabled = C.NoColor
248 232 cst['NoColor'].colors.breakpoint_disabled = C.NoColor
249 233
250 234 cst['Linux'].colors.breakpoint_enabled = C.LightRed
251 235 cst['Linux'].colors.breakpoint_disabled = C.Red
252 236
253 237 cst['LightBG'].colors.breakpoint_enabled = C.LightRed
254 238 cst['LightBG'].colors.breakpoint_disabled = C.Red
255 239
256 240 self.set_colors(color_scheme)
257 241
258 242 # Add a python parser so we can syntax highlight source while
259 243 # debugging.
260 244 self.parser = PyColorize.Parser()
261 245
262 246 def set_colors(self, scheme):
263 247 """Shorthand access to the color table scheme selector method."""
264 248 self.color_scheme_table.set_active_scheme(scheme)
265 249
266 250 def interaction(self, frame, traceback):
267 251 self.shell.set_completer_frame(frame)
268 252 OldPdb.interaction(self, frame, traceback)
269 253
270 254 def new_do_up(self, arg):
271 255 OldPdb.do_up(self, arg)
272 256 self.shell.set_completer_frame(self.curframe)
273 257 do_u = do_up = decorate_fn_with_doc(new_do_up, OldPdb.do_up)
274 258
275 259 def new_do_down(self, arg):
276 260 OldPdb.do_down(self, arg)
277 261 self.shell.set_completer_frame(self.curframe)
278 262
279 263 do_d = do_down = decorate_fn_with_doc(new_do_down, OldPdb.do_down)
280 264
281 265 def new_do_frame(self, arg):
282 266 OldPdb.do_frame(self, arg)
283 267 self.shell.set_completer_frame(self.curframe)
284 268
285 269 def new_do_quit(self, arg):
286 270
287 271 if hasattr(self, 'old_all_completions'):
288 272 self.shell.Completer.all_completions=self.old_all_completions
289 273
290 274
291 275 return OldPdb.do_quit(self, arg)
292 276
293 277 do_q = do_quit = decorate_fn_with_doc(new_do_quit, OldPdb.do_quit)
294 278
295 279 def new_do_restart(self, arg):
296 280 """Restart command. In the context of ipython this is exactly the same
297 281 thing as 'quit'."""
298 282 self.msg("Restart doesn't make sense here. Using 'quit' instead.")
299 283 return self.do_quit(arg)
300 284
301 285 def postloop(self):
302 286 self.shell.set_completer_frame(None)
303 287
304 288 def print_stack_trace(self):
305 289 try:
306 290 for frame_lineno in self.stack:
307 291 self.print_stack_entry(frame_lineno, context = 5)
308 292 except KeyboardInterrupt:
309 293 pass
310 294
311 295 def print_stack_entry(self,frame_lineno,prompt_prefix='\n-> ',
312 296 context = 3):
313 297 #frame, lineno = frame_lineno
314 298 print(self.format_stack_entry(frame_lineno, '', context), file=io.stdout)
315 299
316 300 # vds: >>
317 301 frame, lineno = frame_lineno
318 302 filename = frame.f_code.co_filename
319 303 self.shell.hooks.synchronize_with_editor(filename, lineno, 0)
320 304 # vds: <<
321 305
322 306 def format_stack_entry(self, frame_lineno, lprefix=': ', context = 3):
323 import linecache, repr
307 import repr
324 308
325 309 ret = []
326 310
327 311 Colors = self.color_scheme_table.active_colors
328 312 ColorsNormal = Colors.Normal
329 313 tpl_link = '%s%%s%s' % (Colors.filenameEm, ColorsNormal)
330 314 tpl_call = '%s%%s%s%%s%s' % (Colors.vName, Colors.valEm, ColorsNormal)
331 315 tpl_line = '%%s%s%%s %s%%s' % (Colors.lineno, ColorsNormal)
332 316 tpl_line_em = '%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line,
333 317 ColorsNormal)
334 318
335 319 frame, lineno = frame_lineno
336 320
337 321 return_value = ''
338 322 if '__return__' in frame.f_locals:
339 323 rv = frame.f_locals['__return__']
340 324 #return_value += '->'
341 325 return_value += repr.repr(rv) + '\n'
342 326 ret.append(return_value)
343 327
344 328 #s = filename + '(' + `lineno` + ')'
345 329 filename = self.canonic(frame.f_code.co_filename)
346 330 link = tpl_link % filename
347 331
348 332 if frame.f_code.co_name:
349 333 func = frame.f_code.co_name
350 334 else:
351 335 func = "<lambda>"
352 336
353 337 call = ''
354 338 if func != '?':
355 339 if '__args__' in frame.f_locals:
356 340 args = repr.repr(frame.f_locals['__args__'])
357 341 else:
358 342 args = '()'
359 343 call = tpl_call % (func, args)
360 344
361 345 # The level info should be generated in the same format pdb uses, to
362 346 # avoid breaking the pdbtrack functionality of python-mode in *emacs.
363 347 if frame is self.curframe:
364 348 ret.append('> ')
365 349 else:
366 350 ret.append(' ')
367 351 ret.append('%s(%s)%s\n' % (link,lineno,call))
368 352
369 353 start = lineno - 1 - context//2
370 lines = linecache.getlines(filename)
371 try:
372 encoding, _ = openpy.detect_encoding(_readline(lines))
373 except SyntaxError:
374 encoding = "ascii"
354 lines = ulinecache.getlines(filename)
375 355 start = max(start, 0)
376 356 start = min(start, len(lines) - context)
377 357 lines = lines[start : start + context]
378 358
379 359 for i,line in enumerate(lines):
380 360 show_arrow = (start + 1 + i == lineno)
381 361 linetpl = (frame is self.curframe or show_arrow) \
382 362 and tpl_line_em \
383 363 or tpl_line
384 364 ret.append(self.__format_line(linetpl, filename,
385 start + 1 + i, py3compat.cast_unicode(line),
365 start + 1 + i, line,
386 366 arrow = show_arrow) )
387 367 return ''.join(ret)
388 368
389 369 def __format_line(self, tpl_line, filename, lineno, line, arrow = False):
390 370 bp_mark = ""
391 371 bp_mark_color = ""
392 372
393 373 scheme = self.color_scheme_table.active_scheme_name
394 374 new_line, err = self.parser.format2(line, 'str', scheme)
395 375 if not err: line = new_line
396 376
397 377 bp = None
398 378 if lineno in self.get_file_breaks(filename):
399 379 bps = self.get_breaks(filename, lineno)
400 380 bp = bps[-1]
401 381
402 382 if bp:
403 383 Colors = self.color_scheme_table.active_colors
404 384 bp_mark = str(bp.number)
405 385 bp_mark_color = Colors.breakpoint_enabled
406 386 if not bp.enabled:
407 387 bp_mark_color = Colors.breakpoint_disabled
408 388
409 389 numbers_width = 7
410 390 if arrow:
411 391 # This is the line with the error
412 392 pad = numbers_width - len(str(lineno)) - len(bp_mark)
413 393 if pad >= 3:
414 394 marker = '-'*(pad-3) + '-> '
415 395 elif pad == 2:
416 396 marker = '> '
417 397 elif pad == 1:
418 398 marker = '>'
419 399 else:
420 400 marker = ''
421 401 num = '%s%s' % (marker, str(lineno))
422 402 line = tpl_line % (bp_mark_color + bp_mark, num, line)
423 403 else:
424 404 num = '%*s' % (numbers_width - len(bp_mark), str(lineno))
425 405 line = tpl_line % (bp_mark_color + bp_mark, num, line)
426 406
427 407 return line
428 408
429 409 def list_command_pydb(self, arg):
430 410 """List command to use if we have a newer pydb installed"""
431 411 filename, first, last = OldPdb.parse_list_cmd(self, arg)
432 412 if filename is not None:
433 413 self.print_list_lines(filename, first, last)
434 414
435 415 def print_list_lines(self, filename, first, last):
436 416 """The printing (as opposed to the parsing part of a 'list'
437 417 command."""
438 418 try:
439 419 Colors = self.color_scheme_table.active_colors
440 420 ColorsNormal = Colors.Normal
441 421 tpl_line = '%%s%s%%s %s%%s' % (Colors.lineno, ColorsNormal)
442 422 tpl_line_em = '%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line, ColorsNormal)
443 423 src = []
444 424 if filename == "<string>" and hasattr(self, "_exec_filename"):
445 lines = list(open(self._exec_filename))
446 else:
447 lines = linecache.getlines(filename)
448 try:
449 encoding, _ = openpy.detect_encoding(_readline(lines))
450 except SyntaxError:
451 encoding = "ascii"
452 if not lines:
453 print >>io.stdout, "No src could be located using filename: %r"%filename
454 return #Bailing out, there is nothing to see here
425 filename = self._exec_filename
426
455 427 for lineno in range(first, last+1):
456 line = py3compat.cast_unicode(lines[lineno])
428 ulinecache.getline(filename, lineno)
457 429 if not line:
458 430 break
459 431
460 432 if lineno == self.curframe.f_lineno:
461 433 line = self.__format_line(tpl_line_em, filename, lineno, line, arrow = True)
462 434 else:
463 435 line = self.__format_line(tpl_line, filename, lineno, line, arrow = False)
464 436
465 437 src.append(line)
466 438 self.lineno = lineno
467 439
468 440 print(''.join(src), file=io.stdout)
469 441
470 442 except KeyboardInterrupt:
471 443 pass
472 444
473 445 def do_list(self, arg):
474 446 self.lastcmd = 'list'
475 447 last = None
476 448 if arg:
477 449 try:
478 450 x = eval(arg, {}, {})
479 451 if type(x) == type(()):
480 452 first, last = x
481 453 first = int(first)
482 454 last = int(last)
483 455 if last < first:
484 456 # Assume it's a count
485 457 last = first + last
486 458 else:
487 459 first = max(1, int(x) - 5)
488 460 except:
489 461 print('*** Error in argument:', repr(arg))
490 462 return
491 463 elif self.lineno is None:
492 464 first = max(1, self.curframe.f_lineno - 5)
493 465 else:
494 466 first = self.lineno + 1
495 467 if last is None:
496 468 last = first + 10
497 469 self.print_list_lines(self.curframe.f_code.co_filename, first, last)
498 470
499 471 # vds: >>
500 472 lineno = first
501 473 filename = self.curframe.f_code.co_filename
502 474 self.shell.hooks.synchronize_with_editor(filename, lineno, 0)
503 475 # vds: <<
504 476
505 477 do_l = do_list
506 478
507 479 def do_pdef(self, arg):
508 480 """The debugger interface to magic_pdef"""
509 481 namespaces = [('Locals', self.curframe.f_locals),
510 482 ('Globals', self.curframe.f_globals)]
511 483 self.shell.find_line_magic('pdef')(arg, namespaces=namespaces)
512 484
513 485 def do_pdoc(self, arg):
514 486 """The debugger interface to magic_pdoc"""
515 487 namespaces = [('Locals', self.curframe.f_locals),
516 488 ('Globals', self.curframe.f_globals)]
517 489 self.shell.find_line_magic('pdoc')(arg, namespaces=namespaces)
518 490
519 491 def do_pinfo(self, arg):
520 492 """The debugger equivalant of ?obj"""
521 493 namespaces = [('Locals', self.curframe.f_locals),
522 494 ('Globals', self.curframe.f_globals)]
523 495 self.shell.find_line_magic('pinfo')("pinfo %s" % arg,
524 496 namespaces=namespaces)
525 497
526 498 def checkline(self, filename, lineno):
527 499 """Check whether specified line seems to be executable.
528 500
529 501 Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank
530 502 line or EOF). Warning: testing is not comprehensive.
531 503 """
532 504 #######################################################################
533 505 # XXX Hack! Use python-2.5 compatible code for this call, because with
534 506 # all of our changes, we've drifted from the pdb api in 2.6. For now,
535 507 # changing:
536 508 #
537 509 #line = linecache.getline(filename, lineno, self.curframe.f_globals)
538 510 # to:
539 511 #
540 512 line = linecache.getline(filename, lineno)
541 513 #
542 514 # does the trick. But in reality, we need to fix this by reconciling
543 515 # our updates with the new Pdb APIs in Python 2.6.
544 516 #
545 517 # End hack. The rest of this method is copied verbatim from 2.6 pdb.py
546 518 #######################################################################
547 519
548 520 if not line:
549 521 print('End of file', file=self.stdout)
550 522 return 0
551 523 line = line.strip()
552 524 # Don't allow setting breakpoint at a blank line
553 525 if (not line or (line[0] == '#') or
554 526 (line[:3] == '"""') or line[:3] == "'''"):
555 527 print('*** Blank or comment', file=self.stdout)
556 528 return 0
557 529 return lineno
@@ -1,210 +1,219 b''
1 1 """
2 2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 3 as per PEP 263.
4 4
5 5 Much of the code is taken from the tokenize module in Python 3.2.
6 6 """
7 7 from __future__ import absolute_import
8 8
9 9 import io
10 10 from io import TextIOWrapper, BytesIO
11 11 import re
12 12 import urllib
13 13
14 14 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 15 cookie_comment_re = re.compile(ur"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16 16
17 17 try:
18 18 # Available in Python 3
19 19 from tokenize import detect_encoding
20 20 except ImportError:
21 21 from codecs import lookup, BOM_UTF8
22 22
23 23 # Copied from Python 3.2 tokenize
24 24 def _get_normal_name(orig_enc):
25 25 """Imitates get_normal_name in tokenizer.c."""
26 26 # Only care about the first 12 characters.
27 27 enc = orig_enc[:12].lower().replace("_", "-")
28 28 if enc == "utf-8" or enc.startswith("utf-8-"):
29 29 return "utf-8"
30 30 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
31 31 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
32 32 return "iso-8859-1"
33 33 return orig_enc
34 34
35 35 # Copied from Python 3.2 tokenize
36 36 def detect_encoding(readline):
37 37 """
38 38 The detect_encoding() function is used to detect the encoding that should
39 39 be used to decode a Python source file. It requires one argment, readline,
40 40 in the same way as the tokenize() generator.
41 41
42 42 It will call readline a maximum of twice, and return the encoding used
43 43 (as a string) and a list of any lines (left as bytes) it has read in.
44 44
45 45 It detects the encoding from the presence of a utf-8 bom or an encoding
46 46 cookie as specified in pep-0263. If both a bom and a cookie are present,
47 47 but disagree, a SyntaxError will be raised. If the encoding cookie is an
48 48 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
49 49 'utf-8-sig' is returned.
50 50
51 51 If no encoding is specified, then the default of 'utf-8' will be returned.
52 52 """
53 53 bom_found = False
54 54 encoding = None
55 55 default = 'utf-8'
56 56 def read_or_stop():
57 57 try:
58 58 return readline()
59 59 except StopIteration:
60 60 return b''
61 61
62 62 def find_cookie(line):
63 63 try:
64 64 line_string = line.decode('ascii')
65 65 except UnicodeDecodeError:
66 66 return None
67 67
68 68 matches = cookie_re.findall(line_string)
69 69 if not matches:
70 70 return None
71 71 encoding = _get_normal_name(matches[0])
72 72 try:
73 73 codec = lookup(encoding)
74 74 except LookupError:
75 75 # This behaviour mimics the Python interpreter
76 76 raise SyntaxError("unknown encoding: " + encoding)
77 77
78 78 if bom_found:
79 79 if codec.name != 'utf-8':
80 80 # This behaviour mimics the Python interpreter
81 81 raise SyntaxError('encoding problem: utf-8')
82 82 encoding += '-sig'
83 83 return encoding
84 84
85 85 first = read_or_stop()
86 86 if first.startswith(BOM_UTF8):
87 87 bom_found = True
88 88 first = first[3:]
89 89 default = 'utf-8-sig'
90 90 if not first:
91 91 return default, []
92 92
93 93 encoding = find_cookie(first)
94 94 if encoding:
95 95 return encoding, [first]
96 96
97 97 second = read_or_stop()
98 98 if not second:
99 99 return default, [first]
100 100
101 101 encoding = find_cookie(second)
102 102 if encoding:
103 103 return encoding, [first, second]
104 104
105 105 return default, [first, second]
106 106
107 107 try:
108 108 # Available in Python 3.2 and above.
109 109 from tokenize import open
110 110 except ImportError:
111 111 # Copied from Python 3.2 tokenize
112 112 def open(filename):
113 113 """Open a file in read only mode using the encoding detected by
114 114 detect_encoding().
115 115 """
116 116 buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2
117 117 encoding, lines = detect_encoding(buffer.readline)
118 118 buffer.seek(0)
119 119 text = TextIOWrapper(buffer, encoding, line_buffering=True)
120 120 text.mode = 'r'
121 121 return text
122 122
123 123 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
124 124 """Converts a bytes string with python source code to unicode.
125 125
126 126 Unicode strings are passed through unchanged. Byte strings are checked
127 127 for the python source file encoding cookie to determine encoding.
128 128 txt can be either a bytes buffer or a string containing the source
129 129 code.
130 130 """
131 131 if isinstance(txt, unicode):
132 132 return txt
133 133 if isinstance(txt, bytes):
134 134 buffer = BytesIO(txt)
135 135 else:
136 136 buffer = txt
137 137 try:
138 138 encoding, _ = detect_encoding(buffer.readline)
139 139 except SyntaxError:
140 140 encoding = "ascii"
141 141 buffer.seek(0)
142 142 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
143 143 text.mode = 'r'
144 144 if skip_encoding_cookie:
145 145 return u"".join(strip_encoding_cookie(text))
146 146 else:
147 147 return text.read()
148 148
149 149 def strip_encoding_cookie(filelike):
150 150 """Generator to pull lines from a text-mode file, skipping the encoding
151 151 cookie if it is found in the first two lines.
152 152 """
153 153 it = iter(filelike)
154 154 try:
155 155 first = next(it)
156 156 if not cookie_comment_re.match(first):
157 157 yield first
158 158 second = next(it)
159 159 if not cookie_comment_re.match(second):
160 160 yield second
161 161 except StopIteration:
162 162 return
163 163
164 164 for line in it:
165 165 yield line
166 166
167 167 def read_py_file(filename, skip_encoding_cookie=True):
168 168 """Read a Python file, using the encoding declared inside the file.
169 169
170 170 Parameters
171 171 ----------
172 172 filename : str
173 173 The path to the file to read.
174 174 skip_encoding_cookie : bool
175 175 If True (the default), and the encoding declaration is found in the first
176 176 two lines, that line will be excluded from the output - compiling a
177 177 unicode string with an encoding declaration is a SyntaxError in Python 2.
178 178
179 179 Returns
180 180 -------
181 181 A unicode string containing the contents of the file.
182 182 """
183 183 with open(filename) as f: # the open function defined in this module.
184 184 if skip_encoding_cookie:
185 185 return "".join(strip_encoding_cookie(f))
186 186 else:
187 187 return f.read()
188 188
189 189 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
190 190 """Read a Python file from a URL, using the encoding declared inside the file.
191 191
192 192 Parameters
193 193 ----------
194 194 url : str
195 195 The URL from which to fetch the file.
196 196 errors : str
197 197 How to handle decoding errors in the file. Options are the same as for
198 198 bytes.decode(), but here 'replace' is the default.
199 199 skip_encoding_cookie : bool
200 200 If True (the default), and the encoding declaration is found in the first
201 201 two lines, that line will be excluded from the output - compiling a
202 202 unicode string with an encoding declaration is a SyntaxError in Python 2.
203 203
204 204 Returns
205 205 -------
206 206 A unicode string containing the contents of the file.
207 207 """
208 208 response = urllib.urlopen(url)
209 209 buffer = io.BytesIO(response.read())
210 210 return source_to_unicode(buffer, errors, skip_encoding_cookie)
211
212 def _list_readline(x):
213 """Given a list, returns a readline() function that returns the next element
214 with each call.
215 """
216 x = iter(x)
217 def readline():
218 return next(x)
219 return readline
General Comments 0
You need to be logged in to leave comments. Login now