Show More
@@ -7,9 +7,8 b' Much of the code is taken from the tokenize module in Python 3.2.' | |||||
7 | from __future__ import absolute_import |
|
7 | from __future__ import absolute_import | |
8 |
|
8 | |||
9 | import io |
|
9 | import io | |
10 | from io import TextIOWrapper |
|
10 | from io import TextIOWrapper, BytesIO | |
11 | import re |
|
11 | import re | |
12 | from StringIO import StringIO |
|
|||
13 | import urllib |
|
12 | import urllib | |
14 |
|
13 | |||
15 | cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE) |
|
14 | cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE) | |
@@ -121,16 +120,31 b' except ImportError:' | |||||
121 | text.mode = 'r' |
|
120 | text.mode = 'r' | |
122 | return text |
|
121 | return text | |
123 |
|
122 | |||
124 | def source_to_unicode(txt): |
|
123 | def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): | |
125 | """Converts string with python source code to unicode |
|
124 | """Converts a bytes string with python source code to unicode. | |
|
125 | ||||
|
126 | Unicode strings are passed through unchanged. Byte strings are checked | |||
|
127 | for the python source file encoding cookie to determine encoding. | |||
|
128 | txt can be either a bytes buffer or a string containing the source | |||
|
129 | code. | |||
126 | """ |
|
130 | """ | |
127 | if isinstance(txt, unicode): |
|
131 | if isinstance(txt, unicode): | |
128 | return txt |
|
132 | return txt | |
|
133 | if isinstance(txt, str): | |||
|
134 | buffer = BytesIO(txt) | |||
|
135 | else: | |||
|
136 | buffer = txt | |||
129 | try: |
|
137 | try: | |
130 |
coding, _ = detect_encoding( |
|
138 | encoding, _ = detect_encoding(buffer.readline) | |
131 | except SyntaxError: |
|
139 | except SyntaxError: | |
132 | coding = "ascii" |
|
140 | encoding = "ascii" | |
133 | return txt.decode(coding, errors="replace") |
|
141 | buffer.seek(0) | |
|
142 | text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) | |||
|
143 | text.mode = 'r' | |||
|
144 | if skip_encoding_cookie: | |||
|
145 | return u"".join(strip_encoding_cookie(text)) | |||
|
146 | else: | |||
|
147 | return text.read() | |||
134 |
|
148 | |||
135 | def strip_encoding_cookie(filelike): |
|
149 | def strip_encoding_cookie(filelike): | |
136 | """Generator to pull lines from a text-mode file, skipping the encoding |
|
150 | """Generator to pull lines from a text-mode file, skipping the encoding | |
@@ -193,12 +207,4 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):" | |||||
193 | """ |
|
207 | """ | |
194 | response = urllib.urlopen(url) |
|
208 | response = urllib.urlopen(url) | |
195 | buffer = io.BytesIO(response.read()) |
|
209 | buffer = io.BytesIO(response.read()) | |
196 | encoding, lines = detect_encoding(buffer.readline) |
|
210 | return source_to_unicode(buffer, errors, skip_encoding_cookie) | |
197 | buffer.seek(0) |
|
|||
198 | text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) |
|
|||
199 | text.mode = 'r' |
|
|||
200 | if skip_encoding_cookie: |
|
|||
201 | return "".join(strip_encoding_cookie(text)) |
|
|||
202 | else: |
|
|||
203 | return text.read() |
|
|||
204 |
|
General Comments 0
You need to be logged in to leave comments.
Login now