Show More
@@ -1,95 +1,101 b'' | |||||
1 | import re |
|
1 | import re | |
2 |
|
2 | |||
3 | import requests |
|
3 | import requests | |
4 | from django.core.files.uploadedfile import TemporaryUploadedFile |
|
4 | from django.core.files.uploadedfile import TemporaryUploadedFile | |
5 | from pytube import YouTube |
|
5 | from pytube import YouTube | |
6 |
|
6 | |||
7 | from boards.utils import validate_file_size |
|
7 | from boards.utils import validate_file_size | |
8 |
|
8 | |||
9 | YOUTUBE_VIDEO_FORMAT = 'webm' |
|
9 | YOUTUBE_VIDEO_FORMAT = 'webm' | |
10 |
|
10 | |||
11 | HTTP_RESULT_OK = 200 |
|
11 | HTTP_RESULT_OK = 200 | |
12 |
|
12 | |||
13 | HEADER_CONTENT_LENGTH = 'content-length' |
|
13 | HEADER_CONTENT_LENGTH = 'content-length' | |
14 | HEADER_CONTENT_TYPE = 'content-type' |
|
14 | HEADER_CONTENT_TYPE = 'content-type' | |
15 |
|
15 | |||
16 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 |
|
16 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 | |
17 |
|
17 | |||
18 | REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') |
|
18 | REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') | |
19 | REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*') |
|
19 | REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*') | |
20 |
|
20 | |||
21 | TYPE_URL_ONLY = ( |
|
21 | TYPE_URL_ONLY = ( | |
22 | 'application/xhtml+xml', |
|
22 | 'application/xhtml+xml', | |
23 | 'text/html', |
|
23 | 'text/html', | |
24 | ) |
|
24 | ) | |
25 |
|
25 | |||
26 |
|
26 | |||
27 | class Downloader: |
|
27 | class Downloader: | |
28 | @staticmethod |
|
28 | @staticmethod | |
29 | def handles(url: str) -> bool: |
|
29 | def handles(url: str) -> bool: | |
30 |
return |
|
30 | return True | |
31 |
|
31 | |||
32 | @staticmethod |
|
32 | @staticmethod | |
33 | def download(url: str): |
|
33 | def download(url: str): | |
34 | # Verify content headers |
|
34 | # Verify content headers | |
35 | response_head = requests.head(url, verify=False) |
|
35 | response_head = requests.head(url, verify=False) | |
36 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] |
|
36 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] | |
37 | if content_type in TYPE_URL_ONLY: |
|
37 | if content_type in TYPE_URL_ONLY: | |
38 | return None |
|
38 | return None | |
39 |
|
39 | |||
40 | length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) |
|
40 | length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) | |
41 | if length_header: |
|
41 | if length_header: | |
42 | length = int(length_header) |
|
42 | length = int(length_header) | |
43 | validate_file_size(length) |
|
43 | validate_file_size(length) | |
44 | # Get the actual content into memory |
|
44 | # Get the actual content into memory | |
45 | response = requests.get(url, verify=False, stream=True) |
|
45 | response = requests.get(url, verify=False, stream=True) | |
46 |
|
46 | |||
47 | if response.status_code == HTTP_RESULT_OK: |
|
47 | if response.status_code == HTTP_RESULT_OK: | |
48 | # Download file, stop if the size exceeds limit |
|
48 | # Download file, stop if the size exceeds limit | |
49 | size = 0 |
|
49 | size = 0 | |
50 |
|
50 | |||
51 | # Set a dummy file name that will be replaced |
|
51 | # Set a dummy file name that will be replaced | |
52 | # anyway, just keep the valid extension |
|
52 | # anyway, just keep the valid extension | |
53 | filename = 'file.' + content_type.split('/')[1] |
|
53 | filename = 'file.' + content_type.split('/')[1] | |
54 |
|
54 | |||
55 | file = TemporaryUploadedFile(filename, content_type, 0, None, None) |
|
55 | file = TemporaryUploadedFile(filename, content_type, 0, None, None) | |
56 | for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): |
|
56 | for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): | |
57 | size += len(chunk) |
|
57 | size += len(chunk) | |
58 | validate_file_size(size) |
|
58 | validate_file_size(size) | |
59 | file.write(chunk) |
|
59 | file.write(chunk) | |
60 |
|
60 | |||
61 | return file |
|
61 | return file | |
62 |
|
62 | |||
63 |
|
63 | |||
64 | def download(url): |
|
|||
65 | for downloader in Downloader.__subclasses__(): |
|
|||
66 | if downloader.handles(url): |
|
|||
67 | return downloader.download(url) |
|
|||
68 | # If nobody of the specific downloaders handles this, use generic |
|
|||
69 | # one |
|
|||
70 | return Downloader.download(url) |
|
|||
71 |
|
||||
72 |
|
||||
73 | class YouTubeDownloader(Downloader): |
|
64 | class YouTubeDownloader(Downloader): | |
74 | @staticmethod |
|
65 | @staticmethod | |
75 | def download(url: str): |
|
66 | def download(url: str): | |
76 | yt = YouTube() |
|
67 | yt = YouTube() | |
77 | yt.from_url(url) |
|
68 | yt.from_url(url) | |
78 | videos = yt.filter(YOUTUBE_VIDEO_FORMAT) |
|
69 | videos = yt.filter(YOUTUBE_VIDEO_FORMAT) | |
79 | if len(videos) > 0: |
|
70 | if len(videos) > 0: | |
80 | video = videos[0] |
|
71 | video = videos[0] | |
81 | return Downloader.download(video.url) |
|
72 | return Downloader.download(video.url) | |
82 |
|
73 | |||
83 | @staticmethod |
|
74 | @staticmethod | |
84 | def handles(url: str) -> bool: |
|
75 | def handles(url: str) -> bool: | |
85 | return REGEX_YOUTUBE_URL.match(url) is not None |
|
76 | return REGEX_YOUTUBE_URL.match(url) is not None | |
86 |
|
77 | |||
87 |
|
78 | |||
88 | class NothingDownloader(Downloader): |
|
79 | class NothingDownloader(Downloader): | |
89 | @staticmethod |
|
80 | @staticmethod | |
90 | def handles(url: str) -> bool: |
|
81 | def handles(url: str) -> bool: | |
91 | return REGEX_MAGNET.match(url) |
|
82 | return REGEX_MAGNET.match(url) | |
92 |
|
83 | |||
93 | @staticmethod |
|
84 | @staticmethod | |
94 | def download(url: str): |
|
85 | def download(url: str): | |
95 | return None |
|
86 | return None | |
|
87 | ||||
|
88 | ||||
|
89 | DOWNLOADERS = ( | |||
|
90 | YouTubeDownloader, | |||
|
91 | NothingDownloader, | |||
|
92 | Downloader, | |||
|
93 | ) | |||
|
94 | ||||
|
95 | ||||
|
96 | def download(url): | |||
|
97 | for downloader in DOWNLOADERS: | |||
|
98 | if downloader.handles(url): | |||
|
99 | return downloader.download(url) | |||
|
100 | raise Exception('No downloader supports this URL.') | |||
|
101 |
General Comments 0
You need to be logged in to leave comments.
Login now