##// END OF EJS Templates
use file.read instead of filepath.read_text
rushabh-v -
Show More
@@ -1,105 +1,105 b''
1 1 """
2 2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 3 as per PEP 263.
4 4
5 5 Much of the code is taken from the tokenize module in Python 3.2.
6 6 """
7 7
8 8 import io
9 9 from io import TextIOWrapper, BytesIO
10 10 from pathlib import Path
11 11 import re
12 12 from tokenize import open, detect_encoding
13 13
14 14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
15 15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
16 16
17 17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
18 18 """Converts a bytes string with python source code to unicode.
19 19
20 20 Unicode strings are passed through unchanged. Byte strings are checked
21 21 for the python source file encoding cookie to determine encoding.
22 22 txt can be either a bytes buffer or a string containing the source
23 23 code.
24 24 """
25 25 if isinstance(txt, str):
26 26 return txt
27 27 if isinstance(txt, bytes):
28 28 buffer = BytesIO(txt)
29 29 else:
30 30 buffer = txt
31 31 try:
32 32 encoding, _ = detect_encoding(buffer.readline)
33 33 except SyntaxError:
34 34 encoding = "ascii"
35 35 buffer.seek(0)
36 36 with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
37 37 text.mode = 'r'
38 38 if skip_encoding_cookie:
39 39 return u"".join(strip_encoding_cookie(text))
40 40 else:
41 41 return text.read()
42 42
43 43 def strip_encoding_cookie(filelike):
44 44 """Generator to pull lines from a text-mode file, skipping the encoding
45 45 cookie if it is found in the first two lines.
46 46 """
47 47 it = iter(filelike)
48 48 try:
49 49 first = next(it)
50 50 if not cookie_comment_re.match(first):
51 51 yield first
52 52 second = next(it)
53 53 if not cookie_comment_re.match(second):
54 54 yield second
55 55 except StopIteration:
56 56 return
57 57
58 58 for line in it:
59 59 yield line
60 60
61 61 def read_py_file(filename, skip_encoding_cookie=True):
62 62 """Read a Python file, using the encoding declared inside the file.
63 63
64 64 Parameters
65 65 ----------
66 66 filename : str
67 67 The path to the file to read.
68 68 skip_encoding_cookie : bool
69 69 If True (the default), and the encoding declaration is found in the first
70 70 two lines, that line will be excluded from the output.
71 71
72 72 Returns
73 73 -------
74 74 A unicode string containing the contents of the file.
75 75 """
76 76 filepath = Path(filename)
77 77 with filepath.open() as f: # the open function defined in this module.
78 78 if skip_encoding_cookie:
79 79 return "".join(strip_encoding_cookie(f))
80 80 else:
81 return filepath.read_text()
81 return f.read()
82 82
83 83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
84 84 """Read a Python file from a URL, using the encoding declared inside the file.
85 85
86 86 Parameters
87 87 ----------
88 88 url : str
89 89 The URL from which to fetch the file.
90 90 errors : str
91 91 How to handle decoding errors in the file. Options are the same as for
92 92 bytes.decode(), but here 'replace' is the default.
93 93 skip_encoding_cookie : bool
94 94 If True (the default), and the encoding declaration is found in the first
95 95 two lines, that line will be excluded from the output.
96 96
97 97 Returns
98 98 -------
99 99 A unicode string containing the contents of the file.
100 100 """
101 101 # Deferred import for faster start
102 102 from urllib.request import urlopen
103 103 response = urlopen(url)
104 104 buffer = io.BytesIO(response.read())
105 105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
General Comments 0
You need to be logged in to leave comments. Login now