##// END OF EJS Templates
Merge pull request #12559 from rushabh-v/pathlib_op
Matthias Bussonnier -
r26099:fc4e583d merge
parent child Browse files
Show More
@@ -1,103 +1,105 b''
1 1 """
2 2 Tools to open .py files as Unicode, using the encoding specified within the file,
3 3 as per PEP 263.
4 4
5 5 Much of the code is taken from the tokenize module in Python 3.2.
6 6 """
7 7
8 8 import io
9 9 from io import TextIOWrapper, BytesIO
10 from pathlib import Path
10 11 import re
11 12 from tokenize import open, detect_encoding
12 13
13 14 cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
14 15 cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
15 16
16 17 def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
17 18 """Converts a bytes string with python source code to unicode.
18 19
19 20 Unicode strings are passed through unchanged. Byte strings are checked
20 21 for the python source file encoding cookie to determine encoding.
21 22 txt can be either a bytes buffer or a string containing the source
22 23 code.
23 24 """
24 25 if isinstance(txt, str):
25 26 return txt
26 27 if isinstance(txt, bytes):
27 28 buffer = BytesIO(txt)
28 29 else:
29 30 buffer = txt
30 31 try:
31 32 encoding, _ = detect_encoding(buffer.readline)
32 33 except SyntaxError:
33 34 encoding = "ascii"
34 35 buffer.seek(0)
35 36 with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
36 37 text.mode = 'r'
37 38 if skip_encoding_cookie:
38 39 return u"".join(strip_encoding_cookie(text))
39 40 else:
40 41 return text.read()
41 42
42 43 def strip_encoding_cookie(filelike):
43 44 """Generator to pull lines from a text-mode file, skipping the encoding
44 45 cookie if it is found in the first two lines.
45 46 """
46 47 it = iter(filelike)
47 48 try:
48 49 first = next(it)
49 50 if not cookie_comment_re.match(first):
50 51 yield first
51 52 second = next(it)
52 53 if not cookie_comment_re.match(second):
53 54 yield second
54 55 except StopIteration:
55 56 return
56 57
57 58 for line in it:
58 59 yield line
59 60
60 61 def read_py_file(filename, skip_encoding_cookie=True):
61 62 """Read a Python file, using the encoding declared inside the file.
62 63
63 64 Parameters
64 65 ----------
65 66 filename : str
66 67 The path to the file to read.
67 68 skip_encoding_cookie : bool
68 69 If True (the default), and the encoding declaration is found in the first
69 70 two lines, that line will be excluded from the output.
70 71
71 72 Returns
72 73 -------
73 74 A unicode string containing the contents of the file.
74 75 """
75 with open(filename) as f: # the open function defined in this module.
76 filepath = Path(filename)
77 with open(filepath) as f: # the open function defined in this module.
76 78 if skip_encoding_cookie:
77 79 return "".join(strip_encoding_cookie(f))
78 80 else:
79 81 return f.read()
80 82
81 83 def read_py_url(url, errors='replace', skip_encoding_cookie=True):
82 84 """Read a Python file from a URL, using the encoding declared inside the file.
83 85
84 86 Parameters
85 87 ----------
86 88 url : str
87 89 The URL from which to fetch the file.
88 90 errors : str
89 91 How to handle decoding errors in the file. Options are the same as for
90 92 bytes.decode(), but here 'replace' is the default.
91 93 skip_encoding_cookie : bool
92 94 If True (the default), and the encoding declaration is found in the first
93 95 two lines, that line will be excluded from the output.
94 96
95 97 Returns
96 98 -------
97 99 A unicode string containing the contents of the file.
98 100 """
99 101 # Deferred import for faster start
100 102 from urllib.request import urlopen
101 103 response = urlopen(url)
102 104 buffer = io.BytesIO(response.read())
103 105 return source_to_unicode(buffer, errors, skip_encoding_cookie)
General Comments 0
You need to be logged in to leave comments. Login now