##// END OF EJS Templates
Download HTML only as a link, not as a file
neko259 -
r1683:0b52d251 default
parent child Browse files
Show More
@@ -1,79 +1,95 b''
1 import os
1 import os
2 import re
2 import re
3
3
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
5 TemporaryUploadedFile
5 TemporaryUploadedFile
6 from pytube import YouTube
6 from pytube import YouTube
7 import requests
7 import requests
8
8
9 from boards.utils import validate_file_size
9 from boards.utils import validate_file_size
10
10
11 YOUTUBE_VIDEO_FORMAT = 'webm'
11 YOUTUBE_VIDEO_FORMAT = 'webm'
12
12
13 HTTP_RESULT_OK = 200
13 HTTP_RESULT_OK = 200
14
14
15 HEADER_CONTENT_LENGTH = 'content-length'
15 HEADER_CONTENT_LENGTH = 'content-length'
16 HEADER_CONTENT_TYPE = 'content-type'
16 HEADER_CONTENT_TYPE = 'content-type'
17
17
18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
19
19
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
21
21
22 TYPE_URL_ONLY = (
23 'application/xhtml+xml',
24 'text/html',
25 )
26
22
27
23 class Downloader:
28 class Downloader:
24 @staticmethod
29 @staticmethod
25 def handles(url: str) -> bool:
30 def handles(url: str) -> bool:
26 return False
31 return False
27
32
28 @staticmethod
33 @staticmethod
29 def download(url: str):
34 def download(url: str):
30 # Verify content headers
35 # Verify content headers
31 response_head = requests.head(url, verify=False)
36 response_head = requests.head(url, verify=False)
32 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
37 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
33 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
38 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
34 if length_header:
39 if length_header:
35 length = int(length_header)
40 length = int(length_header)
36 validate_file_size(length)
41 validate_file_size(length)
37 # Get the actual content into memory
42 # Get the actual content into memory
38 response = requests.get(url, verify=False, stream=True)
43 response = requests.get(url, verify=False, stream=True)
39
44
40 # Download file, stop if the size exceeds limit
45 # Download file, stop if the size exceeds limit
41 size = 0
46 size = 0
42
47
43 # Set a dummy file name that will be replaced
48 # Set a dummy file name that will be replaced
44 # anyway, just keep the valid extension
49 # anyway, just keep the valid extension
45 filename = 'file.' + content_type.split('/')[1]
50 filename = 'file.' + content_type.split('/')[1]
46
51
47 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
52 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
48 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
53 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
49 size += len(chunk)
54 size += len(chunk)
50 validate_file_size(size)
55 validate_file_size(size)
51 file.write(chunk)
56 file.write(chunk)
52
57
53 if response.status_code == HTTP_RESULT_OK:
58 if response.status_code == HTTP_RESULT_OK:
54 return file
59 return file
55
60
56
61
57 def download(url):
62 def download(url):
58 for downloader in Downloader.__subclasses__():
63 for downloader in Downloader.__subclasses__():
59 if downloader.handles(url):
64 if downloader.handles(url):
60 return downloader.download(url)
65 return downloader.download(url)
61 # If nobody of the specific downloaders handles this, use generic
66 # If nobody of the specific downloaders handles this, use generic
62 # one
67 # one
63 return Downloader.download(url)
68 return Downloader.download(url)
64
69
65
70
66 class YouTubeDownloader(Downloader):
71 class YouTubeDownloader(Downloader):
67 @staticmethod
72 @staticmethod
68 def download(url: str):
73 def download(url: str):
69 yt = YouTube()
74 yt = YouTube()
70 yt.from_url(url)
75 yt.from_url(url)
71 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
76 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
72 if len(videos) > 0:
77 if len(videos) > 0:
73 video = videos[0]
78 video = videos[0]
74 return Downloader.download(video.url)
79 return Downloader.download(video.url)
75
80
76 @staticmethod
81 @staticmethod
77 def handles(url: str) -> bool:
82 def handles(url: str) -> bool:
78 return YOUTUBE_URL.match(url)
83 return YOUTUBE_URL.match(url)
79
84
85
86 class NothingDownloader(Downloader):
87 @staticmethod
88 def handles(url: str) -> bool:
89 response_head = requests.head(url, verify=False)
90 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
91 return content_type in TYPE_URL_ONLY
92
93 @staticmethod
94 def download(url: str):
95 return None
General Comments 0
You need to be logged in to leave comments. Login now