Show More
@@ -7,9 +7,8 b' Much of the code is taken from the tokenize module in Python 3.2.' | |||
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | 9 | import io |
|
10 | from io import TextIOWrapper | |
|
10 | from io import TextIOWrapper, BytesIO | |
|
11 | 11 | import re |
|
12 | from StringIO import StringIO | |
|
13 | 12 | import urllib |
|
14 | 13 | |
|
15 | 14 | cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE) |
@@ -121,16 +120,31 b' except ImportError:' | |||
|
121 | 120 | text.mode = 'r' |
|
122 | 121 | return text |
|
123 | 122 | |
|
124 | def source_to_unicode(txt): | |
|
125 | """Converts string with python source code to unicode | |
|
123 | def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): | |
|
124 | """Converts a bytes string with python source code to unicode. | |
|
125 | ||
|
126 | Unicode strings are passed through unchanged. Byte strings are checked | |
|
127 | for the python source file encoding cookie to determine encoding. | |
|
128 | txt can be either a bytes buffer or a string containing the source | |
|
129 | code. | |
|
126 | 130 | """ |
|
127 | 131 | if isinstance(txt, unicode): |
|
128 | 132 | return txt |
|
133 | if isinstance(txt, str): | |
|
134 | buffer = BytesIO(txt) | |
|
135 | else: | |
|
136 | buffer = txt | |
|
129 | 137 | try: |
|
130 |
coding, _ = detect_encoding( |
|
|
138 | encoding, _ = detect_encoding(buffer.readline) | |
|
131 | 139 | except SyntaxError: |
|
132 | coding = "ascii" | |
|
133 | return txt.decode(coding, errors="replace") | |
|
140 | encoding = "ascii" | |
|
141 | buffer.seek(0) | |
|
142 | text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) | |
|
143 | text.mode = 'r' | |
|
144 | if skip_encoding_cookie: | |
|
145 | return u"".join(strip_encoding_cookie(text)) | |
|
146 | else: | |
|
147 | return text.read() | |
|
134 | 148 | |
|
135 | 149 | def strip_encoding_cookie(filelike): |
|
136 | 150 | """Generator to pull lines from a text-mode file, skipping the encoding |
@@ -193,12 +207,4 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):" | |||
|
193 | 207 | """ |
|
194 | 208 | response = urllib.urlopen(url) |
|
195 | 209 | buffer = io.BytesIO(response.read()) |
|
196 | encoding, lines = detect_encoding(buffer.readline) | |
|
197 | buffer.seek(0) | |
|
198 | text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) | |
|
199 | text.mode = 'r' | |
|
200 | if skip_encoding_cookie: | |
|
201 | return "".join(strip_encoding_cookie(text)) | |
|
202 | else: | |
|
203 | return text.read() | |
|
204 | ||
|
210 | return source_to_unicode(buffer, errors, skip_encoding_cookie) |
General Comments 0
You need to be logged in to leave comments.
Login now