##// END OF EJS Templates
Merge pull request #11093 from subhrm/11068-B...
Thomas Kluyver -
r24299:a041d240 merge
parent child Browse files
Show More
@@ -1,121 +1,117
1 """
1 """
2 Tools to open .py files as Unicode, using the encoding specified within the file,
2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 as per PEP 263.
3 as per PEP 263.
4
4
5 Much of the code is taken from the tokenize module in Python 3.2.
5 Much of the code is taken from the tokenize module in Python 3.2.
6 """
6 """
7
7
8 import io
8 import io
9 from io import TextIOWrapper, BytesIO
9 from io import TextIOWrapper, BytesIO
10 import re
10 import re
11 from tokenize import open, detect_encoding
11 from tokenize import open, detect_encoding
12
12
13 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
13 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
14 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
15
15
16 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
16 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
17 """Converts a bytes string with python source code to unicode.
17 """Converts a bytes string with python source code to unicode.
18
18
19 Unicode strings are passed through unchanged. Byte strings are checked
19 Unicode strings are passed through unchanged. Byte strings are checked
20 for the python source file encoding cookie to determine encoding.
20 for the python source file encoding cookie to determine encoding.
21 txt can be either a bytes buffer or a string containing the source
21 txt can be either a bytes buffer or a string containing the source
22 code.
22 code.
23 """
23 """
24 if isinstance(txt, str):
24 if isinstance(txt, str):
25 return txt
25 return txt
26 if isinstance(txt, bytes):
26 if isinstance(txt, bytes):
27 buffer = BytesIO(txt)
27 buffer = BytesIO(txt)
28 else:
28 else:
29 buffer = txt
29 buffer = txt
30 try:
30 try:
31 encoding, _ = detect_encoding(buffer.readline)
31 encoding, _ = detect_encoding(buffer.readline)
32 except SyntaxError:
32 except SyntaxError:
33 encoding = "ascii"
33 encoding = "ascii"
34 buffer.seek(0)
34 buffer.seek(0)
35 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
35 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
36 text.mode = 'r'
36 text.mode = 'r'
37 if skip_encoding_cookie:
37 if skip_encoding_cookie:
38 return u"".join(strip_encoding_cookie(text))
38 return u"".join(strip_encoding_cookie(text))
39 else:
39 else:
40 return text.read()
40 return text.read()
41
41
42 def strip_encoding_cookie(filelike):
42 def strip_encoding_cookie(filelike):
43 """Generator to pull lines from a text-mode file, skipping the encoding
43 """Generator to pull lines from a text-mode file, skipping the encoding
44 cookie if it is found in the first two lines.
44 cookie if it is found in the first two lines.
45 """
45 """
46 it = iter(filelike)
46 it = iter(filelike)
47 try:
47 try:
48 first = next(it)
48 first = next(it)
49 if not cookie_comment_re.match(first):
49 if not cookie_comment_re.match(first):
50 yield first
50 yield first
51 second = next(it)
51 second = next(it)
52 if not cookie_comment_re.match(second):
52 if not cookie_comment_re.match(second):
53 yield second
53 yield second
54 except StopIteration:
54 except StopIteration:
55 return
55 return
56
56
57 for line in it:
57 for line in it:
58 yield line
58 yield line
59
59
60 def read_py_file(filename, skip_encoding_cookie=True):
60 def read_py_file(filename, skip_encoding_cookie=True):
61 """Read a Python file, using the encoding declared inside the file.
61 """Read a Python file, using the encoding declared inside the file.
62
62
63 Parameters
63 Parameters
64 ----------
64 ----------
65 filename : str
65 filename : str
66 The path to the file to read.
66 The path to the file to read.
67 skip_encoding_cookie : bool
67 skip_encoding_cookie : bool
68 If True (the default), and the encoding declaration is found in the first
68 If True (the default), and the encoding declaration is found in the first
69 two lines, that line will be excluded from the output - compiling a
69 two lines, that line will be excluded from the output - compiling a
70 unicode string with an encoding declaration is a SyntaxError in Python 2.
70 unicode string with an encoding declaration is a SyntaxError in Python 2.
71
71
72 Returns
72 Returns
73 -------
73 -------
74 A unicode string containing the contents of the file.
74 A unicode string containing the contents of the file.
75 """
75 """
76 with open(filename) as f: # the open function defined in this module.
76 with open(filename) as f: # the open function defined in this module.
77 if skip_encoding_cookie:
77 if skip_encoding_cookie:
78 return "".join(strip_encoding_cookie(f))
78 return "".join(strip_encoding_cookie(f))
79 else:
79 else:
80 return f.read()
80 return f.read()
81
81
82 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
82 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
83 """Read a Python file from a URL, using the encoding declared inside the file.
83 """Read a Python file from a URL, using the encoding declared inside the file.
84
84
85 Parameters
85 Parameters
86 ----------
86 ----------
87 url : str
87 url : str
88 The URL from which to fetch the file.
88 The URL from which to fetch the file.
89 errors : str
89 errors : str
90 How to handle decoding errors in the file. Options are the same as for
90 How to handle decoding errors in the file. Options are the same as for
91 bytes.decode(), but here 'replace' is the default.
91 bytes.decode(), but here 'replace' is the default.
92 skip_encoding_cookie : bool
92 skip_encoding_cookie : bool
93 If True (the default), and the encoding declaration is found in the first
93 If True (the default), and the encoding declaration is found in the first
94 two lines, that line will be excluded from the output - compiling a
94 two lines, that line will be excluded from the output - compiling a
95 unicode string with an encoding declaration is a SyntaxError in Python 2.
95 unicode string with an encoding declaration is a SyntaxError in Python 2.
96
96
97 Returns
97 Returns
98 -------
98 -------
99 A unicode string containing the contents of the file.
99 A unicode string containing the contents of the file.
100 """
100 """
101 # Deferred import for faster start
101 # Deferred import for faster start
102 from urllib.request import urlopen
102 from urllib.request import urlopen
103 response = urlopen(url)
103 response = urlopen(url)
104 buffer = io.BytesIO(response.read())
104 buffer = io.BytesIO(response.read())
105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
106
106
107 def _list_readline(x):
107 def _list_readline(x):
108 """Given a list, returns a readline() function that returns the next element
108 """Given a list, returns a readline() function that returns the next element
109 with each call.
109 with each call.
110 """
110 """
111 x = iter(x)
111 x = iter(x)
112 def readline():
112 def readline():
113 return next(x)
113 return next(x)
114 return readline
114 return readline
115
115
116 # Code for going between .py files and cached .pyc files ----------------------
116 # Code for going between .py files and cached .pyc files ----------------------
117 try:
117 from importlib.util import source_from_cache, cache_from_source
118 from importlib.util import source_from_cache, cache_from_source
119 except ImportError :
120 ## deprecated since 3.4
121 from imp import source_from_cache, cache_from_source
General Comments 0
You need to be logged in to leave comments. Login now