upstream/ipython Commit - r8309:aca66064

make source_to_unicode use BytesIO and refactor

JÃ¶rgen Stenarson -

r8309:aca66064

parent child

IPython/utils/openpy.py

0 +22 -16

@@ -7,9 +7,8 b' Much of the code is taken from the tokenize module in Python 3.2.'
7	from __future__ import absolute_import	7	from __future__ import absolute_import
8		8
9	import io	9	import io
10	from io import TextIOWrapper	10	from io import TextIOWrapper, BytesIO
11	import re	11	import re
12	from StringIO import StringIO
13	import urllib	12	import urllib
14		13
15	cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)	14	cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
@@ -121,16 +120,31 b' except ImportError:'
121	text.mode = 'r'	120	text.mode = 'r'
122	return text	121	return text
123		122
124	def source_to_unicode(txt):	123	def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
125	"""Converts string with python source code to unicode	124	"""Converts a bytes string with python source code to unicode.
		125
		126	Unicode strings are passed through unchanged. Byte strings are checked
		127	for the python source file encoding cookie to determine encoding.
		128	txt can be either a bytes buffer or a string containing the source
		129	code.
126	"""	130	"""
127	if isinstance(txt, unicode):	131	if isinstance(txt, unicode):
128	return txt	132	return txt
		133	if isinstance(txt, str):
		134	buffer = BytesIO(txt)
		135	else:
		136	buffer = txt
129	try:	137	try:
130	coding, _ = detect_encoding(~~StringIO~~(~~txt~~).readline)	138	encoding, _ = detect_encoding(buffer.readline)
131	except SyntaxError:	139	except SyntaxError:
132	coding = "ascii"	140	encoding = "ascii"
133	return txt.decode(coding, errors="replace")	141	buffer.seek(0)
		142	text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
		143	text.mode = 'r'
		144	if skip_encoding_cookie:
		145	return u"".join(strip_encoding_cookie(text))
		146	else:
		147	return text.read()
134		148
135	def strip_encoding_cookie(filelike):	149	def strip_encoding_cookie(filelike):
136	"""Generator to pull lines from a text-mode file, skipping the encoding	150	"""Generator to pull lines from a text-mode file, skipping the encoding
@@ -193,12 +207,4 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):"
193	"""	207	"""
194	response = urllib.urlopen(url)	208	response = urllib.urlopen(url)
195	buffer = io.BytesIO(response.read())	209	buffer = io.BytesIO(response.read())
196	encoding, lines = detect_encoding(buffer.readline)	210	return source_to_unicode(buffer, errors, skip_encoding_cookie)
197	buffer.seek(0)
198	text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
199	text.mode = 'r'
200	if skip_encoding_cookie:
201	return "".join(strip_encoding_cookie(text))
202	else:
203	return text.read()
204

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages