##// END OF EJS Templates
Merge pull request #10131 from srinivasreddy/deprecate_openpy...
Thomas Kluyver -
r23076:f948ff0d merge
parent child Browse files
Show More
@@ -1,247 +1,122 b''
1 """
1 """
2 Tools to open .py files as Unicode, using the encoding specified within the file,
2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 as per PEP 263.
3 as per PEP 263.
4
4
5 Much of the code is taken from the tokenize module in Python 3.2.
5 Much of the code is taken from the tokenize module in Python 3.2.
6 """
6 """
7
7
8 import io
8 import io
9 from io import TextIOWrapper, BytesIO
9 from io import TextIOWrapper, BytesIO
10 import os.path
10 import os.path
11 import re
11 import re
12
12 from tokenize import open, detect_encoding
13
13
14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16
16
17 try:
18 # Available in Python 3
19 from tokenize import detect_encoding
20 except ImportError:
21 from codecs import lookup, BOM_UTF8
22
23 # Copied from Python 3.2 tokenize
24 def _get_normal_name(orig_enc):
25 """Imitates get_normal_name in tokenizer.c."""
26 # Only care about the first 12 characters.
27 enc = orig_enc[:12].lower().replace("_", "-")
28 if enc == "utf-8" or enc.startswith("utf-8-"):
29 return "utf-8"
30 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
31 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
32 return "iso-8859-1"
33 return orig_enc
34
35 # Copied from Python 3.2 tokenize
36 def detect_encoding(readline):
37 """
38 The detect_encoding() function is used to detect the encoding that should
39 be used to decode a Python source file. It requires one argment, readline,
40 in the same way as the tokenize() generator.
41
42 It will call readline a maximum of twice, and return the encoding used
43 (as a string) and a list of any lines (left as bytes) it has read in.
44
45 It detects the encoding from the presence of a utf-8 bom or an encoding
46 cookie as specified in pep-0263. If both a bom and a cookie are present,
47 but disagree, a SyntaxError will be raised. If the encoding cookie is an
48 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
49 'utf-8-sig' is returned.
50
51 If no encoding is specified, then the default of 'utf-8' will be returned.
52 """
53 bom_found = False
54 encoding = None
55 default = 'utf-8'
56 def read_or_stop():
57 try:
58 return readline()
59 except StopIteration:
60 return b''
61
62 def find_cookie(line):
63 try:
64 line_string = line.decode('ascii')
65 except UnicodeDecodeError:
66 return None
67
68 matches = cookie_re.findall(line_string)
69 if not matches:
70 return None
71 encoding = _get_normal_name(matches[0])
72 try:
73 codec = lookup(encoding)
74 except LookupError:
75 # This behaviour mimics the Python interpreter
76 raise SyntaxError("unknown encoding: " + encoding)
77
78 if bom_found:
79 if codec.name != 'utf-8':
80 # This behaviour mimics the Python interpreter
81 raise SyntaxError('encoding problem: utf-8')
82 encoding += '-sig'
83 return encoding
84
85 first = read_or_stop()
86 if first.startswith(BOM_UTF8):
87 bom_found = True
88 first = first[3:]
89 default = 'utf-8-sig'
90 if not first:
91 return default, []
92
93 encoding = find_cookie(first)
94 if encoding:
95 return encoding, [first]
96
97 second = read_or_stop()
98 if not second:
99 return default, [first]
100
101 encoding = find_cookie(second)
102 if encoding:
103 return encoding, [first, second]
104
105 return default, [first, second]
106
107 try:
108 # Available in Python 3.2 and above.
109 from tokenize import open
110 except ImportError:
111 # Copied from Python 3.2 tokenize
112 def open(filename):
113 """Open a file in read only mode using the encoding detected by
114 detect_encoding().
115 """
116 buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2
117 encoding, lines = detect_encoding(buffer.readline)
118 buffer.seek(0)
119 text = TextIOWrapper(buffer, encoding, line_buffering=True)
120 text.mode = 'r'
121 return text
122
123 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
124 """Converts a bytes string with python source code to unicode.
18 """Converts a bytes string with python source code to unicode.
125
19
126 Unicode strings are passed through unchanged. Byte strings are checked
20 Unicode strings are passed through unchanged. Byte strings are checked
127 for the python source file encoding cookie to determine encoding.
21 for the python source file encoding cookie to determine encoding.
128 txt can be either a bytes buffer or a string containing the source
22 txt can be either a bytes buffer or a string containing the source
129 code.
23 code.
130 """
24 """
131 if isinstance(txt, str):
25 if isinstance(txt, str):
132 return txt
26 return txt
133 if isinstance(txt, bytes):
27 if isinstance(txt, bytes):
134 buffer = BytesIO(txt)
28 buffer = BytesIO(txt)
135 else:
29 else:
136 buffer = txt
30 buffer = txt
137 try:
31 try:
138 encoding, _ = detect_encoding(buffer.readline)
32 encoding, _ = detect_encoding(buffer.readline)
139 except SyntaxError:
33 except SyntaxError:
140 encoding = "ascii"
34 encoding = "ascii"
141 buffer.seek(0)
35 buffer.seek(0)
142 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
36 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
143 text.mode = 'r'
37 text.mode = 'r'
144 if skip_encoding_cookie:
38 if skip_encoding_cookie:
145 return u"".join(strip_encoding_cookie(text))
39 return u"".join(strip_encoding_cookie(text))
146 else:
40 else:
147 return text.read()
41 return text.read()
148
42
149 def strip_encoding_cookie(filelike):
43 def strip_encoding_cookie(filelike):
150 """Generator to pull lines from a text-mode file, skipping the encoding
44 """Generator to pull lines from a text-mode file, skipping the encoding
151 cookie if it is found in the first two lines.
45 cookie if it is found in the first two lines.
152 """
46 """
153 it = iter(filelike)
47 it = iter(filelike)
154 try:
48 try:
155 first = next(it)
49 first = next(it)
156 if not cookie_comment_re.match(first):
50 if not cookie_comment_re.match(first):
157 yield first
51 yield first
158 second = next(it)
52 second = next(it)
159 if not cookie_comment_re.match(second):
53 if not cookie_comment_re.match(second):
160 yield second
54 yield second
161 except StopIteration:
55 except StopIteration:
162 return
56 return
163
57
164 for line in it:
58 for line in it:
165 yield line
59 yield line
166
60
167 def read_py_file(filename, skip_encoding_cookie=True):
61 def read_py_file(filename, skip_encoding_cookie=True):
168 """Read a Python file, using the encoding declared inside the file.
62 """Read a Python file, using the encoding declared inside the file.
169
63
170 Parameters
64 Parameters
171 ----------
65 ----------
172 filename : str
66 filename : str
173 The path to the file to read.
67 The path to the file to read.
174 skip_encoding_cookie : bool
68 skip_encoding_cookie : bool
175 If True (the default), and the encoding declaration is found in the first
69 If True (the default), and the encoding declaration is found in the first
176 two lines, that line will be excluded from the output - compiling a
70 two lines, that line will be excluded from the output - compiling a
177 unicode string with an encoding declaration is a SyntaxError in Python 2.
71 unicode string with an encoding declaration is a SyntaxError in Python 2.
178
72
179 Returns
73 Returns
180 -------
74 -------
181 A unicode string containing the contents of the file.
75 A unicode string containing the contents of the file.
182 """
76 """
183 with open(filename) as f: # the open function defined in this module.
77 with open(filename) as f: # the open function defined in this module.
184 if skip_encoding_cookie:
78 if skip_encoding_cookie:
185 return "".join(strip_encoding_cookie(f))
79 return "".join(strip_encoding_cookie(f))
186 else:
80 else:
187 return f.read()
81 return f.read()
188
82
189 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
190 """Read a Python file from a URL, using the encoding declared inside the file.
84 """Read a Python file from a URL, using the encoding declared inside the file.
191
85
192 Parameters
86 Parameters
193 ----------
87 ----------
194 url : str
88 url : str
195 The URL from which to fetch the file.
89 The URL from which to fetch the file.
196 errors : str
90 errors : str
197 How to handle decoding errors in the file. Options are the same as for
91 How to handle decoding errors in the file. Options are the same as for
198 bytes.decode(), but here 'replace' is the default.
92 bytes.decode(), but here 'replace' is the default.
199 skip_encoding_cookie : bool
93 skip_encoding_cookie : bool
200 If True (the default), and the encoding declaration is found in the first
94 If True (the default), and the encoding declaration is found in the first
201 two lines, that line will be excluded from the output - compiling a
95 two lines, that line will be excluded from the output - compiling a
202 unicode string with an encoding declaration is a SyntaxError in Python 2.
96 unicode string with an encoding declaration is a SyntaxError in Python 2.
203
97
204 Returns
98 Returns
205 -------
99 -------
206 A unicode string containing the contents of the file.
100 A unicode string containing the contents of the file.
207 """
101 """
208 # Deferred import for faster start
102 # Deferred import for faster start
209 try:
103 from urllib.request import urlopen
210 from urllib.request import urlopen # Py 3
211 except ImportError:
212 from urllib import urlopen
213 response = urlopen(url)
104 response = urlopen(url)
214 buffer = io.BytesIO(response.read())
105 buffer = io.BytesIO(response.read())
215 return source_to_unicode(buffer, errors, skip_encoding_cookie)
106 return source_to_unicode(buffer, errors, skip_encoding_cookie)
216
107
217 def _list_readline(x):
108 def _list_readline(x):
218 """Given a list, returns a readline() function that returns the next element
109 """Given a list, returns a readline() function that returns the next element
219 with each call.
110 with each call.
220 """
111 """
221 x = iter(x)
112 x = iter(x)
222 def readline():
113 def readline():
223 return next(x)
114 return next(x)
224 return readline
115 return readline
225
116
226 # Code for going between .py files and cached .pyc files ----------------------
117 # Code for going between .py files and cached .pyc files ----------------------
227
118 try:
228 try: # Python 3.2, see PEP 3147
119 from importlib.util import source_from_cache, cache_from_source
229 try:
120 except ImportError :
230 from importlib.util import source_from_cache, cache_from_source
121 ## deprecated since 3.4
231 except ImportError :
122 from imp import source_from_cache, cache_from_source
232 ## deprecated since 3.4
233 from imp import source_from_cache, cache_from_source
234 except ImportError:
235 # Python <= 3.1: .pyc files go next to .py
236 def source_from_cache(path):
237 basename, ext = os.path.splitext(path)
238 if ext not in ('.pyc', '.pyo'):
239 raise ValueError('Not a cached Python file extension', ext)
240 # Should we look for .pyw files?
241 return basename + '.py'
242
243 def cache_from_source(path, debug_override=None):
244 if debug_override is None:
245 debug_override = __debug__
246 basename, ext = os.path.splitext(path)
247 return basename + '.pyc' if debug_override else '.pyo'
General Comments 0
You need to be logged in to leave comments. Login now