Show More
@@ -1,140 +1,141 b'' | |||||
1 | # downloads.py - Code for downloading dependencies. |
|
1 | # downloads.py - Code for downloading dependencies. | |
2 | # |
|
2 | # | |
3 | # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com> |
|
3 | # Copyright 2019 Gregory Szorc <gregory.szorc@gmail.com> | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | # no-check-code because Python 3 native. |
|
8 | # no-check-code because Python 3 native. | |
9 |
|
9 | |||
10 | import gzip |
|
10 | import gzip | |
11 | import hashlib |
|
11 | import hashlib | |
12 | import pathlib |
|
12 | import pathlib | |
|
13 | import typing | |||
13 | import urllib.request |
|
14 | import urllib.request | |
14 |
|
15 | |||
15 |
|
16 | |||
16 | DOWNLOADS = { |
|
17 | DOWNLOADS = { | |
17 | 'gettext': { |
|
18 | 'gettext': { | |
18 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-bin.zip', |
|
19 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-bin.zip', | |
19 | 'size': 1606131, |
|
20 | 'size': 1606131, | |
20 | 'sha256': '60b9ef26bc5cceef036f0424e542106cf158352b2677f43a01affd6d82a1d641', |
|
21 | 'sha256': '60b9ef26bc5cceef036f0424e542106cf158352b2677f43a01affd6d82a1d641', | |
21 | 'version': '0.14.4', |
|
22 | 'version': '0.14.4', | |
22 | }, |
|
23 | }, | |
23 | 'gettext-dep': { |
|
24 | 'gettext-dep': { | |
24 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-dep.zip', |
|
25 | 'url': 'https://versaweb.dl.sourceforge.net/project/gnuwin32/gettext/0.14.4/gettext-0.14.4-dep.zip', | |
25 | 'size': 715086, |
|
26 | 'size': 715086, | |
26 | 'sha256': '411f94974492fd2ecf52590cb05b1023530aec67e64154a88b1e4ebcd9c28588', |
|
27 | 'sha256': '411f94974492fd2ecf52590cb05b1023530aec67e64154a88b1e4ebcd9c28588', | |
27 | }, |
|
28 | }, | |
28 | } |
|
29 | } | |
29 |
|
30 | |||
30 |
|
31 | |||
31 | def hash_path(p: pathlib.Path): |
|
32 | def hash_path(p: pathlib.Path): | |
32 | h = hashlib.sha256() |
|
33 | h = hashlib.sha256() | |
33 |
|
34 | |||
34 | with p.open('rb') as fh: |
|
35 | with p.open('rb') as fh: | |
35 | while True: |
|
36 | while True: | |
36 | chunk = fh.read(65536) |
|
37 | chunk = fh.read(65536) | |
37 | if not chunk: |
|
38 | if not chunk: | |
38 | break |
|
39 | break | |
39 |
|
40 | |||
40 | h.update(chunk) |
|
41 | h.update(chunk) | |
41 |
|
42 | |||
42 | return h.hexdigest() |
|
43 | return h.hexdigest() | |
43 |
|
44 | |||
44 |
|
45 | |||
45 | class IntegrityError(Exception): |
|
46 | class IntegrityError(Exception): | |
46 | """Represents an integrity error when downloading a URL.""" |
|
47 | """Represents an integrity error when downloading a URL.""" | |
47 |
|
48 | |||
48 |
|
49 | |||
49 | def secure_download_stream(url, size, sha256): |
|
50 | def secure_download_stream(url, size, sha256): | |
50 | """Securely download a URL to a stream of chunks. |
|
51 | """Securely download a URL to a stream of chunks. | |
51 |
|
52 | |||
52 | If the integrity of the download fails, an IntegrityError is |
|
53 | If the integrity of the download fails, an IntegrityError is | |
53 | raised. |
|
54 | raised. | |
54 | """ |
|
55 | """ | |
55 | h = hashlib.sha256() |
|
56 | h = hashlib.sha256() | |
56 | length = 0 |
|
57 | length = 0 | |
57 |
|
58 | |||
58 | with urllib.request.urlopen(url) as fh: |
|
59 | with urllib.request.urlopen(url) as fh: | |
59 | if ( |
|
60 | if ( | |
60 | not url.endswith('.gz') |
|
61 | not url.endswith('.gz') | |
61 | and fh.info().get('Content-Encoding') == 'gzip' |
|
62 | and fh.info().get('Content-Encoding') == 'gzip' | |
62 | ): |
|
63 | ): | |
63 | fh = gzip.GzipFile(fileobj=fh) |
|
64 | fh = gzip.GzipFile(fileobj=fh) | |
64 |
|
65 | |||
65 | while True: |
|
66 | while True: | |
66 | chunk = fh.read(65536) |
|
67 | chunk = fh.read(65536) | |
67 | if not chunk: |
|
68 | if not chunk: | |
68 | break |
|
69 | break | |
69 |
|
70 | |||
70 | h.update(chunk) |
|
71 | h.update(chunk) | |
71 | length += len(chunk) |
|
72 | length += len(chunk) | |
72 |
|
73 | |||
73 | yield chunk |
|
74 | yield chunk | |
74 |
|
75 | |||
75 | digest = h.hexdigest() |
|
76 | digest = h.hexdigest() | |
76 |
|
77 | |||
77 | if length != size: |
|
78 | if length != size: | |
78 | raise IntegrityError( |
|
79 | raise IntegrityError( | |
79 | 'size mismatch on %s: wanted %d; got %d' % (url, size, length) |
|
80 | 'size mismatch on %s: wanted %d; got %d' % (url, size, length) | |
80 | ) |
|
81 | ) | |
81 |
|
82 | |||
82 | if digest != sha256: |
|
83 | if digest != sha256: | |
83 | raise IntegrityError( |
|
84 | raise IntegrityError( | |
84 | 'sha256 mismatch on %s: wanted %s; got %s' % (url, sha256, digest) |
|
85 | 'sha256 mismatch on %s: wanted %s; got %s' % (url, sha256, digest) | |
85 | ) |
|
86 | ) | |
86 |
|
87 | |||
87 |
|
88 | |||
88 | def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): |
|
89 | def download_to_path(url: str, path: pathlib.Path, size: int, sha256: str): | |
89 | """Download a URL to a filesystem path, possibly with verification.""" |
|
90 | """Download a URL to a filesystem path, possibly with verification.""" | |
90 |
|
91 | |||
91 | # We download to a temporary file and rename at the end so there's |
|
92 | # We download to a temporary file and rename at the end so there's | |
92 | # no chance of the final file being partially written or containing |
|
93 | # no chance of the final file being partially written or containing | |
93 | # bad data. |
|
94 | # bad data. | |
94 | print('downloading %s to %s' % (url, path)) |
|
95 | print('downloading %s to %s' % (url, path)) | |
95 |
|
96 | |||
96 | if path.exists(): |
|
97 | if path.exists(): | |
97 | good = True |
|
98 | good = True | |
98 |
|
99 | |||
99 | if path.stat().st_size != size: |
|
100 | if path.stat().st_size != size: | |
100 | print('existing file size is wrong; removing') |
|
101 | print('existing file size is wrong; removing') | |
101 | good = False |
|
102 | good = False | |
102 |
|
103 | |||
103 | if good: |
|
104 | if good: | |
104 | if hash_path(path) != sha256: |
|
105 | if hash_path(path) != sha256: | |
105 | print('existing file hash is wrong; removing') |
|
106 | print('existing file hash is wrong; removing') | |
106 | good = False |
|
107 | good = False | |
107 |
|
108 | |||
108 | if good: |
|
109 | if good: | |
109 | print('%s exists and passes integrity checks' % path) |
|
110 | print('%s exists and passes integrity checks' % path) | |
110 | return |
|
111 | return | |
111 |
|
112 | |||
112 | path.unlink() |
|
113 | path.unlink() | |
113 |
|
114 | |||
114 | tmp = path.with_name('%s.tmp' % path.name) |
|
115 | tmp = path.with_name('%s.tmp' % path.name) | |
115 |
|
116 | |||
116 | try: |
|
117 | try: | |
117 | with tmp.open('wb') as fh: |
|
118 | with tmp.open('wb') as fh: | |
118 | for chunk in secure_download_stream(url, size, sha256): |
|
119 | for chunk in secure_download_stream(url, size, sha256): | |
119 | fh.write(chunk) |
|
120 | fh.write(chunk) | |
120 | except IntegrityError: |
|
121 | except IntegrityError: | |
121 | tmp.unlink() |
|
122 | tmp.unlink() | |
122 | raise |
|
123 | raise | |
123 |
|
124 | |||
124 | tmp.rename(path) |
|
125 | tmp.rename(path) | |
125 | print('successfully downloaded %s' % url) |
|
126 | print('successfully downloaded %s' % url) | |
126 |
|
127 | |||
127 |
|
128 | |||
128 | def download_entry( |
|
129 | def download_entry( | |
129 |
name: |
|
130 | name: str, dest_path: pathlib.Path, local_name=None | |
130 | ) -> pathlib.Path: |
|
131 | ) -> typing.Tuple[pathlib.Path, typing.Dict[str, typing.Union[str, int]]]: | |
131 | entry = DOWNLOADS[name] |
|
132 | entry = DOWNLOADS[name] | |
132 |
|
133 | |||
133 | url = entry['url'] |
|
134 | url = entry['url'] | |
134 |
|
135 | |||
135 | local_name = local_name or url[url.rindex('/') + 1 :] |
|
136 | local_name = local_name or url[url.rindex('/') + 1 :] | |
136 |
|
137 | |||
137 | local_path = dest_path / local_name |
|
138 | local_path = dest_path / local_name | |
138 | download_to_path(url, local_path, entry['size'], entry['sha256']) |
|
139 | download_to_path(url, local_path, entry['size'], entry['sha256']) | |
139 |
|
140 | |||
140 | return local_path, entry |
|
141 | return local_path, entry |
General Comments 0
You need to be logged in to leave comments.
Login now