##// END OF EJS Templates
Properly close some leaked resources.
Matthias Bussonnier -
Show More
@@ -1,105 +1,105 b''
1 1 """
2 2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 3 as per PEP 263.
4 4
5 5 Much of the code is taken from the tokenize module in Python 3.2.
6 6 """
7 7
8 8 import io
9 9 from io import TextIOWrapper, BytesIO
10 10 import re
11 11 from tokenize import open, detect_encoding
12 12
13 13 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 14 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
15 15
16 16 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
17 17 """Converts a bytes string with python source code to unicode.
18 18
19 19 Unicode strings are passed through unchanged. Byte strings are checked
20 20 for the python source file encoding cookie to determine encoding.
21 21 txt can be either a bytes buffer or a string containing the source
22 22 code.
23 23 """
24 24 if isinstance(txt, str):
25 25 return txt
26 26 if isinstance(txt, bytes):
27 27 buffer = BytesIO(txt)
28 28 else:
29 29 buffer = txt
30 30 try:
31 31 encoding, _ = detect_encoding(buffer.readline)
32 32 except SyntaxError:
33 33 encoding = "ascii"
34 34 buffer.seek(0)
35 text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
36 text.mode = 'r'
37 if skip_encoding_cookie:
38 return u"".join(strip_encoding_cookie(text))
39 else:
40 return text.read()
35 with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
36 text.mode = 'r'
37 if skip_encoding_cookie:
38 return u"".join(strip_encoding_cookie(text))
39 else:
40 return text.read()
41 41
42 42 def strip_encoding_cookie(filelike):
43 43 """Generator to pull lines from a text-mode file, skipping the encoding
44 44 cookie if it is found in the first two lines.
45 45 """
46 46 it = iter(filelike)
47 47 try:
48 48 first = next(it)
49 49 if not cookie_comment_re.match(first):
50 50 yield first
51 51 second = next(it)
52 52 if not cookie_comment_re.match(second):
53 53 yield second
54 54 except StopIteration:
55 55 return
56 56
57 57 for line in it:
58 58 yield line
59 59
60 60 def read_py_file(filename, skip_encoding_cookie=True):
61 61 """Read a Python file, using the encoding declared inside the file.
62 62
63 63 Parameters
64 64 ----------
65 65 filename : str
66 66 The path to the file to read.
67 67 skip_encoding_cookie : bool
68 68 If True (the default), and the encoding declaration is found in the first
69 69 two lines, that line will be excluded from the output - compiling a
70 70 unicode string with an encoding declaration is a SyntaxError in Python 2.
71 71
72 72 Returns
73 73 -------
74 74 A unicode string containing the contents of the file.
75 75 """
76 76 with open(filename) as f: # the open function defined in this module.
77 77 if skip_encoding_cookie:
78 78 return "".join(strip_encoding_cookie(f))
79 79 else:
80 80 return f.read()
81 81
82 82 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
83 83 """Read a Python file from a URL, using the encoding declared inside the file.
84 84
85 85 Parameters
86 86 ----------
87 87 url : str
88 88 The URL from which to fetch the file.
89 89 errors : str
90 90 How to handle decoding errors in the file. Options are the same as for
91 91 bytes.decode(), but here 'replace' is the default.
92 92 skip_encoding_cookie : bool
93 93 If True (the default), and the encoding declaration is found in the first
94 94 two lines, that line will be excluded from the output - compiling a
95 95 unicode string with an encoding declaration is a SyntaxError in Python 2.
96 96
97 97 Returns
98 98 -------
99 99 A unicode string containing the contents of the file.
100 100 """
101 101 # Deferred import for faster start
102 102 from urllib.request import urlopen
103 103 response = urlopen(url)
104 104 buffer = io.BytesIO(response.read())
105 105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
General Comments 0
You need to be logged in to leave comments. Login now