##// END OF EJS Templates
autoreformat with darker
Matthias Bussonnier -
Show More
@@ -1,105 +1,105 b''
1 """
1 """
2 Tools to open .py files as Unicode, using the encoding specified within the file,
2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 as per PEP 263.
3 as per PEP 263.
4
4
5 Much of the code is taken from the tokenize module in Python 3.2.
5 Much of the code is taken from the tokenize module in Python 3.2.
6 """
6 """
7
7
8 import io
8 import io
9 from io import TextIOWrapper, BytesIO
9 from io import TextIOWrapper, BytesIO
10 from pathlib import Path
10 from pathlib import Path
11 import re
11 import re
12 from tokenize import open, detect_encoding
12 from tokenize import open, detect_encoding
13
13
14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16
16
17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
18 """Converts a bytes string with python source code to unicode.
18 """Converts a bytes string with python source code to unicode.
19
19
20 Unicode strings are passed through unchanged. Byte strings are checked
20 Unicode strings are passed through unchanged. Byte strings are checked
21 for the python source file encoding cookie to determine encoding.
21 for the python source file encoding cookie to determine encoding.
22 txt can be either a bytes buffer or a string containing the source
22 txt can be either a bytes buffer or a string containing the source
23 code.
23 code.
24 """
24 """
25 if isinstance(txt, str):
25 if isinstance(txt, str):
26 return txt
26 return txt
27 if isinstance(txt, bytes):
27 if isinstance(txt, bytes):
28 buffer = BytesIO(txt)
28 buffer = BytesIO(txt)
29 else:
29 else:
30 buffer = txt
30 buffer = txt
31 try:
31 try:
32 encoding, _ = detect_encoding(buffer.readline)
32 encoding, _ = detect_encoding(buffer.readline)
33 except SyntaxError:
33 except SyntaxError:
34 encoding = "ascii"
34 encoding = "ascii"
35 buffer.seek(0)
35 buffer.seek(0)
36 with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
36 with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
37 text.mode = 'r'
37 text.mode = 'r'
38 if skip_encoding_cookie:
38 if skip_encoding_cookie:
39 return u"".join(strip_encoding_cookie(text))
39 return u"".join(strip_encoding_cookie(text))
40 else:
40 else:
41 return text.read()
41 return text.read()
42
42
43 def strip_encoding_cookie(filelike):
43 def strip_encoding_cookie(filelike):
44 """Generator to pull lines from a text-mode file, skipping the encoding
44 """Generator to pull lines from a text-mode file, skipping the encoding
45 cookie if it is found in the first two lines.
45 cookie if it is found in the first two lines.
46 """
46 """
47 it = iter(filelike)
47 it = iter(filelike)
48 try:
48 try:
49 first = next(it)
49 first = next(it)
50 if not cookie_comment_re.match(first):
50 if not cookie_comment_re.match(first):
51 yield first
51 yield first
52 second = next(it)
52 second = next(it)
53 if not cookie_comment_re.match(second):
53 if not cookie_comment_re.match(second):
54 yield second
54 yield second
55 except StopIteration:
55 except StopIteration:
56 return
56 return
57
57
58 for line in it:
58 for line in it:
59 yield line
59 yield line
60
60
61 def read_py_file(filename, skip_encoding_cookie=True):
61 def read_py_file(filename, skip_encoding_cookie=True):
62 """Read a Python file, using the encoding declared inside the file.
62 """Read a Python file, using the encoding declared inside the file.
63
63
64 Parameters
64 Parameters
65 ----------
65 ----------
66 filename : str
66 filename : str
67 The path to the file to read.
67 The path to the file to read.
68 skip_encoding_cookie : bool
68 skip_encoding_cookie : bool
69 If True (the default), and the encoding declaration is found in the first
69 If True (the default), and the encoding declaration is found in the first
70 two lines, that line will be excluded from the output.
70 two lines, that line will be excluded from the output.
71
71
72 Returns
72 Returns
73 -------
73 -------
74 A unicode string containing the contents of the file.
74 A unicode string containing the contents of the file.
75 """
75 """
76 filepath = Path(filename)
76 filepath = Path(filename)
77 with open(filepath) as f: # the open function defined in this module.
77 with open(filepath) as f: # the open function defined in this module.
78 if skip_encoding_cookie:
78 if skip_encoding_cookie:
79 return "".join(strip_encoding_cookie(f))
79 return "".join(strip_encoding_cookie(f))
80 else:
80 else:
81 return f.read()
81 return f.read()
82
82
83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
84 """Read a Python file from a URL, using the encoding declared inside the file.
84 """Read a Python file from a URL, using the encoding declared inside the file.
85
85
86 Parameters
86 Parameters
87 ----------
87 ----------
88 url : str
88 url : str
89 The URL from which to fetch the file.
89 The URL from which to fetch the file.
90 errors : str
90 errors : str
91 How to handle decoding errors in the file. Options are the same as for
91 How to handle decoding errors in the file. Options are the same as for
92 bytes.decode(), but here 'replace' is the default.
92 bytes.decode(), but here 'replace' is the default.
93 skip_encoding_cookie : bool
93 skip_encoding_cookie : bool
94 If True (the default), and the encoding declaration is found in the first
94 If True (the default), and the encoding declaration is found in the first
95 two lines, that line will be excluded from the output.
95 two lines, that line will be excluded from the output.
96
96
97 Returns
97 Returns
98 -------
98 -------
99 A unicode string containing the contents of the file.
99 A unicode string containing the contents of the file.
100 """
100 """
101 # Deferred import for faster start
101 # Deferred import for faster start
102 from urllib.request import urlopen
102 from urllib.request import urlopen
103 response = urlopen(url)
103 response = urlopen(url)
104 buffer = io.BytesIO(response.read())
104 buffer = io.BytesIO(response.read())
105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
General Comments 0
You need to be logged in to leave comments. Login now