##// END OF EJS Templates
make source_to_unicode use BytesIO and refactor
Jörgen Stenarson -
Show More
@@ -7,9 +7,8 b' Much of the code is taken from the tokenize module in Python 3.2.'
7 7 from __future__ import absolute_import
8 8
9 9 import io
10 from io import TextIOWrapper
10 from io import TextIOWrapper, BytesIO
11 11 import re
12 from StringIO import StringIO
13 12 import urllib
14 13
15 14 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
@@ -121,16 +120,31 b' except ImportError:'
121 120 text.mode = 'r'
122 121 return text
123 122
124 def source_to_unicode(txt):
125 """Converts string with python source code to unicode
123 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
124 """Converts a bytes string with python source code to unicode.
125
126 Unicode strings are passed through unchanged. Byte strings are checked
127 for the python source file encoding cookie to determine encoding.
128 txt can be either a bytes buffer or a string containing the source
129 code.
126 130 """
127 131 if isinstance(txt, unicode):
128 132 return txt
133 if isinstance(txt, str):
134 buffer = BytesIO(txt)
135 else:
136 buffer = txt
129 137 try:
130 coding, _ = detect_encoding(StringIO(txt).readline)
138 encoding, _ = detect_encoding(buffer.readline)
131 139 except SyntaxError:
132 coding = "ascii"
133 return txt.decode(coding, errors="replace")
140 encoding = "ascii"
141 buffer.seek(0)
142 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
143 text.mode = 'r'
144 if skip_encoding_cookie:
145 return u"".join(strip_encoding_cookie(text))
146 else:
147 return text.read()
134 148
135 149 def strip_encoding_cookie(filelike):
136 150 """Generator to pull lines from a text-mode file, skipping the encoding
@@ -193,12 +207,4 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):"
193 207 """
194 208 response = urllib.urlopen(url)
195 209 buffer = io.BytesIO(response.read())
196 encoding, lines = detect_encoding(buffer.readline)
197 buffer.seek(0)
198 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
199 text.mode = 'r'
200 if skip_encoding_cookie:
201 return "".join(strip_encoding_cookie(text))
202 else:
203 return text.read()
204
210 return source_to_unicode(buffer, errors, skip_encoding_cookie)
General Comments 0
You need to be logged in to leave comments. Login now