Show More
@@ -1,140 +1,141 b'' | |||
|
1 | 1 | # downloads.py - Code for downloading dependencies. |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | # no-check-code because Python 3 native. |
|
9 | 9 | |
|
10 | 10 | import gzip |
|
11 | 11 | import hashlib |
|
12 | 12 | import pathlib |
|
13 | import typing | |
|
13 | 14 | import urllib.request |
|
14 | 15 | |
|
15 | 16 | |
|
16 | 17 | DOWNLOADS = { |
|
17 | 18 | 'gettext': { |
|
18 | 19 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-bin.zip', |
|
19 | 20 | 'size': 1606131, |
|
20 | 21 | 'sha256': '60b9ef26bc5cceef036f0424e542106cf158352b2677f43a01affd6d82a1d641', |
|
21 | 22 | 'version': '0.14.4', |
|
22 | 23 | }, |
|
23 | 24 | 'gettext-dep': { |
|
24 | 25 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-dep.zip', |
|
25 | 26 | 'size': 715086, |
|
26 | 27 | 'sha256': '411f94974492fd2ecf52590cb05b1023530aec67e64154a88b1e4ebcd9c28588', |
|
27 | 28 | }, |
|
28 | 29 | } |
|
29 | 30 | |
|
30 | 31 | |
|
31 | 32 | def hash_path(p: pathlib.Path): |
|
32 | 33 | h = hashlib.sha256() |
|
33 | 34 | |
|
34 | 35 | with p.open('rb') as fh: |
|
35 | 36 | while True: |
|
36 | 37 | chunk = fh.read(65536) |
|
37 | 38 | if not chunk: |
|
38 | 39 | break |
|
39 | 40 | |
|
40 | 41 | h.update(chunk) |
|
41 | 42 | |
|
42 | 43 | return h.hexdigest() |
|
43 | 44 | |
|
44 | 45 | |
|
45 | 46 | class IntegrityError(Exception): |
|
46 | 47 | """Represents an integrity error when downloading a URL.""" |
|
47 | 48 | |
|
48 | 49 | |
|
49 | 50 | def secure_download_stream(url, size, sha256): |
|
50 | 51 | """Securely download a URL to a stream of chunks. |
|
51 | 52 | |
|
52 | 53 | If the integrity of the download fails, an IntegrityError is |
|
53 | 54 | raised. |
|
54 | 55 | """ |
|
55 | 56 | h = hashlib.sha256() |
|
56 | 57 | length = 0 |
|
57 | 58 | |
|
58 | 59 | with urllib.request.urlopen(url) as fh: |
|
59 | 60 | if ( |
|
60 | 61 | not url.endswith('.gz') |
|
61 | 62 | and fh.info().get('Content-Encoding') == 'gzip' |
|
62 | 63 | ): |
|
63 | 64 | fh = gzip.GzipFile(fileobj=fh) |
|
64 | 65 | |
|
65 | 66 | while True: |
|
66 | 67 | chunk = fh.read(65536) |
|
67 | 68 | if not chunk: |
|
68 | 69 | break |
|
69 | 70 | |
|
70 | 71 | h.update(chunk) |
|
71 | 72 | length += len(chunk) |
|
72 | 73 | |
|
73 | 74 | yield chunk |
|
74 | 75 | |
|
75 | 76 | digest = h.hexdigest() |
|
76 | 77 | |
|
77 | 78 | if length != size: |
|
78 | 79 | raise IntegrityError( |
|
79 | 80 | 'size mismatch on %s: wanted %d; got %d' % (url, size, length) |
|
80 | 81 | ) |
|
81 | 82 | |
|
82 | 83 | if digest != sha256: |
|
83 | 84 | raise IntegrityError( |
|
84 | 85 | 'sha256 mismatch on %s: wanted %s; got %s' % (url, sha256, digest) |
|
85 | 86 | ) |
|
86 | 87 | |
|
87 | 88 | |
|
88 | 89 | def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): |
|
89 | 90 | """Download a URL to a filesystem path, possibly with verification.""" |
|
90 | 91 | |
|
91 | 92 | # We download to a temporary file and rename at the end so there's |
|
92 | 93 | # no chance of the final file being partially written or containing |
|
93 | 94 | # bad data. |
|
94 | 95 | print('downloading %s to %s' % (url, path)) |
|
95 | 96 | |
|
96 | 97 | if path.exists(): |
|
97 | 98 | good = True |
|
98 | 99 | |
|
99 | 100 | if path.stat().st_size != size: |
|
100 | 101 | print('existing file size is wrong; removing') |
|
101 | 102 | good = False |
|
102 | 103 | |
|
103 | 104 | if good: |
|
104 | 105 | if hash_path(path) != sha256: |
|
105 | 106 | print('existing file hash is wrong; removing') |
|
106 | 107 | good = False |
|
107 | 108 | |
|
108 | 109 | if good: |
|
109 | 110 | print('%s exists and passes integrity checks' % path) |
|
110 | 111 | return |
|
111 | 112 | |
|
112 | 113 | path.unlink() |
|
113 | 114 | |
|
114 | 115 | tmp = path.with_name('%s.tmp' % path.name) |
|
115 | 116 | |
|
116 | 117 | try: |
|
117 | 118 | with tmp.open('wb') as fh: |
|
118 | 119 | for chunk in secure_download_stream(url, size, sha256): |
|
119 | 120 | fh.write(chunk) |
|
120 | 121 | except IntegrityError: |
|
121 | 122 | tmp.unlink() |
|
122 | 123 | raise |
|
123 | 124 | |
|
124 | 125 | tmp.rename(path) |
|
125 | 126 | print('successfully downloaded %s' % url) |
|
126 | 127 | |
|
127 | 128 | |
|
128 | 129 | def download_entry( |
|
129 |
name: |
|
|
130 | ) -> pathlib.Path: | |
|
130 | name: str, dest_path: pathlib.Path, local_name=None | |
|
131 | ) -> typing.Tuple[pathlib.Path, typing.Dict[str, typing.Union[str, int]]]: | |
|
131 | 132 | entry = DOWNLOADS[name] |
|
132 | 133 | |
|
133 | 134 | url = entry['url'] |
|
134 | 135 | |
|
135 | 136 | local_name = local_name or url[url.rindex('/') + 1 :] |
|
136 | 137 | |
|
137 | 138 | local_path = dest_path / local_name |
|
138 | 139 | download_to_path(url, local_path, entry['size'], entry['sha256']) |
|
139 | 140 | |
|
140 | 141 | return local_path, entry |
General Comments 0
You need to be logged in to leave comments.
Login now