##// END OF EJS Templates
Merge pull request #10131 from srinivasreddy/deprecate_openpy...
Thomas Kluyver -
r23076:f948ff0d merge
parent child Browse files
Show More
@@ -9,117 +9,11 b' import io'
9 9 from io import TextIOWrapper, BytesIO
10 10 import os.path
11 11 import re
12
12 from tokenize import open, detect_encoding
13 13
14 14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16 16
17 try:
18 # Available in Python 3
19 from tokenize import detect_encoding
20 except ImportError:
21 from codecs import lookup, BOM_UTF8
22
23 # Copied from Python 3.2 tokenize
24 def _get_normal_name(orig_enc):
25 """Imitates get_normal_name in tokenizer.c."""
26 # Only care about the first 12 characters.
27 enc = orig_enc[:12].lower().replace("_", "-")
28 if enc == "utf-8" or enc.startswith("utf-8-"):
29 return "utf-8"
30 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
31 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
32 return "iso-8859-1"
33 return orig_enc
34
35 # Copied from Python 3.2 tokenize
36 def detect_encoding(readline):
37 """
38 The detect_encoding() function is used to detect the encoding that should
39 be used to decode a Python source file. It requires one argment, readline,
40 in the same way as the tokenize() generator.
41
42 It will call readline a maximum of twice, and return the encoding used
43 (as a string) and a list of any lines (left as bytes) it has read in.
44
45 It detects the encoding from the presence of a utf-8 bom or an encoding
46 cookie as specified in pep-0263. If both a bom and a cookie are present,
47 but disagree, a SyntaxError will be raised. If the encoding cookie is an
48 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
49 'utf-8-sig' is returned.
50
51 If no encoding is specified, then the default of 'utf-8' will be returned.
52 """
53 bom_found = False
54 encoding = None
55 default = 'utf-8'
56 def read_or_stop():
57 try:
58 return readline()
59 except StopIteration:
60 return b''
61
62 def find_cookie(line):
63 try:
64 line_string = line.decode('ascii')
65 except UnicodeDecodeError:
66 return None
67
68 matches = cookie_re.findall(line_string)
69 if not matches:
70 return None
71 encoding = _get_normal_name(matches[0])
72 try:
73 codec = lookup(encoding)
74 except LookupError:
75 # This behaviour mimics the Python interpreter
76 raise SyntaxError("unknown encoding: " + encoding)
77
78 if bom_found:
79 if codec.name != 'utf-8':
80 # This behaviour mimics the Python interpreter
81 raise SyntaxError('encoding problem: utf-8')
82 encoding += '-sig'
83 return encoding
84
85 first = read_or_stop()
86 if first.startswith(BOM_UTF8):
87 bom_found = True
88 first = first[3:]
89 default = 'utf-8-sig'
90 if not first:
91 return default, []
92
93 encoding = find_cookie(first)
94 if encoding:
95 return encoding, [first]
96
97 second = read_or_stop()
98 if not second:
99 return default, [first]
100
101 encoding = find_cookie(second)
102 if encoding:
103 return encoding, [first, second]
104
105 return default, [first, second]
106
107 try:
108 # Available in Python 3.2 and above.
109 from tokenize import open
110 except ImportError:
111 # Copied from Python 3.2 tokenize
112 def open(filename):
113 """Open a file in read only mode using the encoding detected by
114 detect_encoding().
115 """
116 buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2
117 encoding, lines = detect_encoding(buffer.readline)
118 buffer.seek(0)
119 text = TextIOWrapper(buffer, encoding, line_buffering=True)
120 text.mode = 'r'
121 return text
122
123 17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
124 18 """Converts a bytes string with python source code to unicode.
125 19
@@ -206,10 +100,7 b" def read_py_url(url, errors='replace', skip_encoding_cookie=True):"
206 100 A unicode string containing the contents of the file.
207 101 """
208 102 # Deferred import for faster start
209 try:
210 from urllib.request import urlopen # Py 3
211 except ImportError:
212 from urllib import urlopen
103 from urllib.request import urlopen
213 104 response = urlopen(url)
214 105 buffer = io.BytesIO(response.read())
215 106 return source_to_unicode(buffer, errors, skip_encoding_cookie)
@@ -224,24 +115,8 b' def _list_readline(x):'
224 115 return readline
225 116
226 117 # Code for going between .py files and cached .pyc files ----------------------
227
228 try: # Python 3.2, see PEP 3147
229 try:
230 from importlib.util import source_from_cache, cache_from_source
231 except ImportError :
232 ## deprecated since 3.4
233 from imp import source_from_cache, cache_from_source
234 except ImportError:
235 # Python <= 3.1: .pyc files go next to .py
236 def source_from_cache(path):
237 basename, ext = os.path.splitext(path)
238 if ext not in ('.pyc', '.pyo'):
239 raise ValueError('Not a cached Python file extension', ext)
240 # Should we look for .pyw files?
241 return basename + '.py'
242
243 def cache_from_source(path, debug_override=None):
244 if debug_override is None:
245 debug_override = __debug__
246 basename, ext = os.path.splitext(path)
247 return basename + '.pyc' if debug_override else '.pyo'
118 try:
119 from importlib.util import source_from_cache, cache_from_source
120 except ImportError :
121 ## deprecated since 3.4
122 from imp import source_from_cache, cache_from_source
General Comments 0
You need to be logged in to leave comments. Login now