##// END OF EJS Templates
Specific order of downloaders ensures they will work as expected
neko259 -
r1811:c2aa90c2 default
parent child Browse files
Show More
@@ -1,95 +1,101 b''
1 import re
1 import re
2
2
3 import requests
3 import requests
4 from django.core.files.uploadedfile import TemporaryUploadedFile
4 from django.core.files.uploadedfile import TemporaryUploadedFile
5 from pytube import YouTube
5 from pytube import YouTube
6
6
7 from boards.utils import validate_file_size
7 from boards.utils import validate_file_size
8
8
9 YOUTUBE_VIDEO_FORMAT = 'webm'
9 YOUTUBE_VIDEO_FORMAT = 'webm'
10
10
11 HTTP_RESULT_OK = 200
11 HTTP_RESULT_OK = 200
12
12
13 HEADER_CONTENT_LENGTH = 'content-length'
13 HEADER_CONTENT_LENGTH = 'content-length'
14 HEADER_CONTENT_TYPE = 'content-type'
14 HEADER_CONTENT_TYPE = 'content-type'
15
15
16 FILE_DOWNLOAD_CHUNK_BYTES = 200000
16 FILE_DOWNLOAD_CHUNK_BYTES = 200000
17
17
18 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
18 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
19 REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
19 REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
20
20
21 TYPE_URL_ONLY = (
21 TYPE_URL_ONLY = (
22 'application/xhtml+xml',
22 'application/xhtml+xml',
23 'text/html',
23 'text/html',
24 )
24 )
25
25
26
26
27 class Downloader:
27 class Downloader:
28 @staticmethod
28 @staticmethod
29 def handles(url: str) -> bool:
29 def handles(url: str) -> bool:
30 return False
30 return True
31
31
32 @staticmethod
32 @staticmethod
33 def download(url: str):
33 def download(url: str):
34 # Verify content headers
34 # Verify content headers
35 response_head = requests.head(url, verify=False)
35 response_head = requests.head(url, verify=False)
36 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
36 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
37 if content_type in TYPE_URL_ONLY:
37 if content_type in TYPE_URL_ONLY:
38 return None
38 return None
39
39
40 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
40 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
41 if length_header:
41 if length_header:
42 length = int(length_header)
42 length = int(length_header)
43 validate_file_size(length)
43 validate_file_size(length)
44 # Get the actual content into memory
44 # Get the actual content into memory
45 response = requests.get(url, verify=False, stream=True)
45 response = requests.get(url, verify=False, stream=True)
46
46
47 if response.status_code == HTTP_RESULT_OK:
47 if response.status_code == HTTP_RESULT_OK:
48 # Download file, stop if the size exceeds limit
48 # Download file, stop if the size exceeds limit
49 size = 0
49 size = 0
50
50
51 # Set a dummy file name that will be replaced
51 # Set a dummy file name that will be replaced
52 # anyway, just keep the valid extension
52 # anyway, just keep the valid extension
53 filename = 'file.' + content_type.split('/')[1]
53 filename = 'file.' + content_type.split('/')[1]
54
54
55 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
55 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
56 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
56 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
57 size += len(chunk)
57 size += len(chunk)
58 validate_file_size(size)
58 validate_file_size(size)
59 file.write(chunk)
59 file.write(chunk)
60
60
61 return file
61 return file
62
62
63
63
64 def download(url):
65 for downloader in Downloader.__subclasses__():
66 if downloader.handles(url):
67 return downloader.download(url)
68 # If nobody of the specific downloaders handles this, use generic
69 # one
70 return Downloader.download(url)
71
72
73 class YouTubeDownloader(Downloader):
64 class YouTubeDownloader(Downloader):
74 @staticmethod
65 @staticmethod
75 def download(url: str):
66 def download(url: str):
76 yt = YouTube()
67 yt = YouTube()
77 yt.from_url(url)
68 yt.from_url(url)
78 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
69 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
79 if len(videos) > 0:
70 if len(videos) > 0:
80 video = videos[0]
71 video = videos[0]
81 return Downloader.download(video.url)
72 return Downloader.download(video.url)
82
73
83 @staticmethod
74 @staticmethod
84 def handles(url: str) -> bool:
75 def handles(url: str) -> bool:
85 return REGEX_YOUTUBE_URL.match(url) is not None
76 return REGEX_YOUTUBE_URL.match(url) is not None
86
77
87
78
88 class NothingDownloader(Downloader):
79 class NothingDownloader(Downloader):
89 @staticmethod
80 @staticmethod
90 def handles(url: str) -> bool:
81 def handles(url: str) -> bool:
91 return REGEX_MAGNET.match(url)
82 return REGEX_MAGNET.match(url)
92
83
93 @staticmethod
84 @staticmethod
94 def download(url: str):
85 def download(url: str):
95 return None
86 return None
87
88
89 DOWNLOADERS = (
90 YouTubeDownloader,
91 NothingDownloader,
92 Downloader,
93 )
94
95
96 def download(url):
97 for downloader in DOWNLOADERS:
98 if downloader.handles(url):
99 return downloader.download(url)
100 raise Exception('No downloader supports this URL.')
101
General Comments 0
You need to be logged in to leave comments. Login now