##// END OF EJS Templates
make source_to_unicode use BytesIO and refactor
Jörgen Stenarson -
Show More
@@ -7,9 +7,8 b' Much of the code is taken from the tokenize module in Python 3.2.'
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import io
9 import io
10 from io import TextIOWrapper
10 from io import TextIOWrapper, BytesIO
11 import re
11 import re
12 from StringIO import StringIO
13 import urllib
12 import urllib
14
13
15 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
@@ -121,16 +120,31 b' except ImportError:'
121 text.mode = 'r'
120 text.mode = 'r'
122 return text
121 return text
123
122
124 def source_to_unicode(txt):
123 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
125 """Converts string with python source code to unicode
124 """Converts a bytes string with python source code to unicode.
125
126 Unicode strings are passed through unchanged. Byte strings are checked
127 for the python source file encoding cookie to determine encoding.
128 txt can be either a bytes buffer or a string containing the source
129 code.
126 """
130 """
127 if isinstance(txt, unicode):
131 if isinstance(txt, unicode):
128 return txt
132 return txt
133 if isinstance(txt, str):
134 buffer = BytesIO(txt)
135 else:
136 buffer = txt
129 try:
137 try:
130 coding, _ = detect_encoding(StringIO(txt).readline)
138 encoding, _ = detect_encoding(buffer.readline)
131 except SyntaxError:
139 except SyntaxError:
132 coding = "ascii"
140 encoding = "ascii"
133 return txt.decode(coding, errors="replace")
141 buffer.seek(0)
142 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
143 text.mode = 'r'
144 if skip_encoding_cookie:
145 return u"".join(strip_encoding_cookie(text))
146 else:
147 return text.read()
134
148
135 def strip_encoding_cookie(filelike):
149 def strip_encoding_cookie(filelike):
136 """Generator to pull lines from a text-mode file, skipping the encoding
150 """Generator to pull lines from a text-mode file, skipping the encoding
@@ -193,12 +207,4 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):"
193 """
207 """
194 response = urllib.urlopen(url)
208 response = urllib.urlopen(url)
195 buffer = io.BytesIO(response.read())
209 buffer = io.BytesIO(response.read())
196 encoding, lines = detect_encoding(buffer.readline)
210 return source_to_unicode(buffer, errors, skip_encoding_cookie)
197 buffer.seek(0)
198 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
199 text.mode = 'r'
200 if skip_encoding_cookie:
201 return "".join(strip_encoding_cookie(text))
202 else:
203 return text.read()
204
General Comments 0
You need to be logged in to leave comments. Login now