##// END OF EJS Templates
remove python 2.7.x specific code
Srinivas Reddy Thatiparthy -
Show More
@@ -1,247 +1,122 b''
1 1 """
2 2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 3 as per PEP 263.
4 4
5 5 Much of the code is taken from the tokenize module in Python 3.2.
6 6 """
7 7
8 8 import io
9 9 from io import TextIOWrapper, BytesIO
10 10 import os.path
11 11 import re
12
12 from tokenize import open, detect_encoding
13 13
14 14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16 16
17 try:
18 # Available in Python 3
19 from tokenize import detect_encoding
20 except ImportError:
21 from codecs import lookup, BOM_UTF8
22
23 # Copied from Python 3.2 tokenize
24 def _get_normal_name(orig_enc):
25 """Imitates get_normal_name in tokenizer.c."""
26 # Only care about the first 12 characters.
27 enc = orig_enc[:12].lower().replace("_", "-")
28 if enc == "utf-8" or enc.startswith("utf-8-"):
29 return "utf-8"
30 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
31 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
32 return "iso-8859-1"
33 return orig_enc
34
35 # Copied from Python 3.2 tokenize
36 def detect_encoding(readline):
37 """
38 The detect_encoding() function is used to detect the encoding that should
39 be used to decode a Python source file. It requires one argment, readline,
40 in the same way as the tokenize() generator.
41
42 It will call readline a maximum of twice, and return the encoding used
43 (as a string) and a list of any lines (left as bytes) it has read in.
44
45 It detects the encoding from the presence of a utf-8 bom or an encoding
46 cookie as specified in pep-0263. If both a bom and a cookie are present,
47 but disagree, a SyntaxError will be raised. If the encoding cookie is an
48 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
49 'utf-8-sig' is returned.
50
51 If no encoding is specified, then the default of 'utf-8' will be returned.
52 """
53 bom_found = False
54 encoding = None
55 default = 'utf-8'
56 def read_or_stop():
57 try:
58 return readline()
59 except StopIteration:
60 return b''
61
62 def find_cookie(line):
63 try:
64 line_string = line.decode('ascii')
65 except UnicodeDecodeError:
66 return None
67
68 matches = cookie_re.findall(line_string)
69 if not matches:
70 return None
71 encoding = _get_normal_name(matches[0])
72 try:
73 codec = lookup(encoding)
74 except LookupError:
75 # This behaviour mimics the Python interpreter
76 raise SyntaxError("unknown encoding: " + encoding)
77
78 if bom_found:
79 if codec.name != 'utf-8':
80 # This behaviour mimics the Python interpreter
81 raise SyntaxError('encoding problem: utf-8')
82 encoding += '-sig'
83 return encoding
84
85 first = read_or_stop()
86 if first.startswith(BOM_UTF8):
87 bom_found = True
88 first = first[3:]
89 default = 'utf-8-sig'
90 if not first:
91 return default, []
92
93 encoding = find_cookie(first)
94 if encoding:
95 return encoding, [first]
96
97 second = read_or_stop()
98 if not second:
99 return default, [first]
100
101 encoding = find_cookie(second)
102 if encoding:
103 return encoding, [first, second]
104
105 return default, [first, second]
106
107 try:
108 # Available in Python 3.2 and above.
109 from tokenize import open
110 except ImportError:
111 # Copied from Python 3.2 tokenize
112 def open(filename):
113 """Open a file in read only mode using the encoding detected by
114 detect_encoding().
115 """
116 buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2
117 encoding, lines = detect_encoding(buffer.readline)
118 buffer.seek(0)
119 text = TextIOWrapper(buffer, encoding, line_buffering=True)
120 text.mode = 'r'
121 return text
122
123 17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
124 18 """Converts a bytes string with python source code to unicode.
125 19
126 20 Unicode strings are passed through unchanged. Byte strings are checked
127 21 for the python source file encoding cookie to determine encoding.
128 22 txt can be either a bytes buffer or a string containing the source
129 23 code.
130 24 """
131 25 if isinstance(txt, str):
132 26 return txt
133 27 if isinstance(txt, bytes):
134 28 buffer = BytesIO(txt)
135 29 else:
136 30 buffer = txt
137 31 try:
138 32 encoding, _ = detect_encoding(buffer.readline)
139 33 except SyntaxError:
140 34 encoding = "ascii"
141 35 buffer.seek(0)
142 36 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
143 37 text.mode = 'r'
144 38 if skip_encoding_cookie:
145 39 return u"".join(strip_encoding_cookie(text))
146 40 else:
147 41 return text.read()
148 42
149 43 def strip_encoding_cookie(filelike):
150 44 """Generator to pull lines from a text-mode file, skipping the encoding
151 45 cookie if it is found in the first two lines.
152 46 """
153 47 it = iter(filelike)
154 48 try:
155 49 first = next(it)
156 50 if not cookie_comment_re.match(first):
157 51 yield first
158 52 second = next(it)
159 53 if not cookie_comment_re.match(second):
160 54 yield second
161 55 except StopIteration:
162 56 return
163 57
164 58 for line in it:
165 59 yield line
166 60
167 61 def read_py_file(filename, skip_encoding_cookie=True):
168 62 """Read a Python file, using the encoding declared inside the file.
169 63
170 64 Parameters
171 65 ----------
172 66 filename : str
173 67 The path to the file to read.
174 68 skip_encoding_cookie : bool
175 69 If True (the default), and the encoding declaration is found in the first
176 70 two lines, that line will be excluded from the output - compiling a
177 71 unicode string with an encoding declaration is a SyntaxError in Python 2.
178 72
179 73 Returns
180 74 -------
181 75 A unicode string containing the contents of the file.
182 76 """
183 77 with open(filename) as f: # the open function defined in this module.
184 78 if skip_encoding_cookie:
185 79 return "".join(strip_encoding_cookie(f))
186 80 else:
187 81 return f.read()
188 82
189 83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
190 84 """Read a Python file from a URL, using the encoding declared inside the file.
191 85
192 86 Parameters
193 87 ----------
194 88 url : str
195 89 The URL from which to fetch the file.
196 90 errors : str
197 91 How to handle decoding errors in the file. Options are the same as for
198 92 bytes.decode(), but here 'replace' is the default.
199 93 skip_encoding_cookie : bool
200 94 If True (the default), and the encoding declaration is found in the first
201 95 two lines, that line will be excluded from the output - compiling a
202 96 unicode string with an encoding declaration is a SyntaxError in Python 2.
203 97
204 98 Returns
205 99 -------
206 100 A unicode string containing the contents of the file.
207 101 """
208 102 # Deferred import for faster start
209 try:
210 from urllib.request import urlopen # Py 3
211 except ImportError:
212 from urllib import urlopen
103 from urllib.request import urlopen
213 104 response = urlopen(url)
214 105 buffer = io.BytesIO(response.read())
215 106 return source_to_unicode(buffer, errors, skip_encoding_cookie)
216 107
217 108 def _list_readline(x):
218 109 """Given a list, returns a readline() function that returns the next element
219 110 with each call.
220 111 """
221 112 x = iter(x)
222 113 def readline():
223 114 return next(x)
224 115 return readline
225 116
226 117 # Code for going between .py files and cached .pyc files ----------------------
227
228 try: # Python 3.2, see PEP 3147
229 118 try:
230 119 from importlib.util import source_from_cache, cache_from_source
231 120 except ImportError :
232 121 ## deprecated since 3.4
233 122 from imp import source_from_cache, cache_from_source
234 except ImportError:
235 # Python <= 3.1: .pyc files go next to .py
236 def source_from_cache(path):
237 basename, ext = os.path.splitext(path)
238 if ext not in ('.pyc', '.pyo'):
239 raise ValueError('Not a cached Python file extension', ext)
240 # Should we look for .pyw files?
241 return basename + '.py'
242
243 def cache_from_source(path, debug_override=None):
244 if debug_override is None:
245 debug_override = __debug__
246 basename, ext = os.path.splitext(path)
247 return basename + '.pyc' if debug_override else '.pyo'
General Comments 0
You need to be logged in to leave comments. Login now