Show More
@@ -1,79 +1,95 b'' | |||||
1 | import os |
|
1 | import os | |
2 | import re |
|
2 | import re | |
3 |
|
3 | |||
4 | from django.core.files.uploadedfile import SimpleUploadedFile, \ |
|
4 | from django.core.files.uploadedfile import SimpleUploadedFile, \ | |
5 | TemporaryUploadedFile |
|
5 | TemporaryUploadedFile | |
6 | from pytube import YouTube |
|
6 | from pytube import YouTube | |
7 | import requests |
|
7 | import requests | |
8 |
|
8 | |||
9 | from boards.utils import validate_file_size |
|
9 | from boards.utils import validate_file_size | |
10 |
|
10 | |||
11 | YOUTUBE_VIDEO_FORMAT = 'webm' |
|
11 | YOUTUBE_VIDEO_FORMAT = 'webm' | |
12 |
|
12 | |||
13 | HTTP_RESULT_OK = 200 |
|
13 | HTTP_RESULT_OK = 200 | |
14 |
|
14 | |||
15 | HEADER_CONTENT_LENGTH = 'content-length' |
|
15 | HEADER_CONTENT_LENGTH = 'content-length' | |
16 | HEADER_CONTENT_TYPE = 'content-type' |
|
16 | HEADER_CONTENT_TYPE = 'content-type' | |
17 |
|
17 | |||
18 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 |
|
18 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 | |
19 |
|
19 | |||
20 | YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') |
|
20 | YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') | |
21 |
|
21 | |||
|
22 | TYPE_URL_ONLY = ( | |||
|
23 | 'application/xhtml+xml', | |||
|
24 | 'text/html', | |||
|
25 | ) | |||
|
26 | ||||
22 |
|
27 | |||
23 | class Downloader: |
|
28 | class Downloader: | |
24 | @staticmethod |
|
29 | @staticmethod | |
25 | def handles(url: str) -> bool: |
|
30 | def handles(url: str) -> bool: | |
26 | return False |
|
31 | return False | |
27 |
|
32 | |||
28 | @staticmethod |
|
33 | @staticmethod | |
29 | def download(url: str): |
|
34 | def download(url: str): | |
30 | # Verify content headers |
|
35 | # Verify content headers | |
31 | response_head = requests.head(url, verify=False) |
|
36 | response_head = requests.head(url, verify=False) | |
32 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] |
|
37 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] | |
33 | length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) |
|
38 | length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) | |
34 | if length_header: |
|
39 | if length_header: | |
35 | length = int(length_header) |
|
40 | length = int(length_header) | |
36 | validate_file_size(length) |
|
41 | validate_file_size(length) | |
37 | # Get the actual content into memory |
|
42 | # Get the actual content into memory | |
38 | response = requests.get(url, verify=False, stream=True) |
|
43 | response = requests.get(url, verify=False, stream=True) | |
39 |
|
44 | |||
40 | # Download file, stop if the size exceeds limit |
|
45 | # Download file, stop if the size exceeds limit | |
41 | size = 0 |
|
46 | size = 0 | |
42 |
|
47 | |||
43 | # Set a dummy file name that will be replaced |
|
48 | # Set a dummy file name that will be replaced | |
44 | # anyway, just keep the valid extension |
|
49 | # anyway, just keep the valid extension | |
45 | filename = 'file.' + content_type.split('/')[1] |
|
50 | filename = 'file.' + content_type.split('/')[1] | |
46 |
|
51 | |||
47 | file = TemporaryUploadedFile(filename, content_type, 0, None, None) |
|
52 | file = TemporaryUploadedFile(filename, content_type, 0, None, None) | |
48 | for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): |
|
53 | for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): | |
49 | size += len(chunk) |
|
54 | size += len(chunk) | |
50 | validate_file_size(size) |
|
55 | validate_file_size(size) | |
51 | file.write(chunk) |
|
56 | file.write(chunk) | |
52 |
|
57 | |||
53 | if response.status_code == HTTP_RESULT_OK: |
|
58 | if response.status_code == HTTP_RESULT_OK: | |
54 | return file |
|
59 | return file | |
55 |
|
60 | |||
56 |
|
61 | |||
57 | def download(url): |
|
62 | def download(url): | |
58 | for downloader in Downloader.__subclasses__(): |
|
63 | for downloader in Downloader.__subclasses__(): | |
59 | if downloader.handles(url): |
|
64 | if downloader.handles(url): | |
60 | return downloader.download(url) |
|
65 | return downloader.download(url) | |
61 | # If nobody of the specific downloaders handles this, use generic |
|
66 | # If nobody of the specific downloaders handles this, use generic | |
62 | # one |
|
67 | # one | |
63 | return Downloader.download(url) |
|
68 | return Downloader.download(url) | |
64 |
|
69 | |||
65 |
|
70 | |||
66 | class YouTubeDownloader(Downloader): |
|
71 | class YouTubeDownloader(Downloader): | |
67 | @staticmethod |
|
72 | @staticmethod | |
68 | def download(url: str): |
|
73 | def download(url: str): | |
69 | yt = YouTube() |
|
74 | yt = YouTube() | |
70 | yt.from_url(url) |
|
75 | yt.from_url(url) | |
71 | videos = yt.filter(YOUTUBE_VIDEO_FORMAT) |
|
76 | videos = yt.filter(YOUTUBE_VIDEO_FORMAT) | |
72 | if len(videos) > 0: |
|
77 | if len(videos) > 0: | |
73 | video = videos[0] |
|
78 | video = videos[0] | |
74 | return Downloader.download(video.url) |
|
79 | return Downloader.download(video.url) | |
75 |
|
80 | |||
76 | @staticmethod |
|
81 | @staticmethod | |
77 | def handles(url: str) -> bool: |
|
82 | def handles(url: str) -> bool: | |
78 | return YOUTUBE_URL.match(url) |
|
83 | return YOUTUBE_URL.match(url) | |
79 |
|
84 | |||
|
85 | ||||
|
86 | class NothingDownloader(Downloader): | |||
|
87 | @staticmethod | |||
|
88 | def handles(url: str) -> bool: | |||
|
89 | response_head = requests.head(url, verify=False) | |||
|
90 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] | |||
|
91 | return content_type in TYPE_URL_ONLY | |||
|
92 | ||||
|
93 | @staticmethod | |||
|
94 | def download(url: str): | |||
|
95 | return None |
General Comments 0
You need to be logged in to leave comments.
Login now