##// END OF EJS Templates
Download youtube as videos, not always by links as html content
neko259 -
r1685:0fb0af80 default
parent child Browse files
Show More
@@ -1,95 +1,95 b''
1 import os
1 import os
2 import re
2 import re
3
3
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
5 TemporaryUploadedFile
5 TemporaryUploadedFile
6 from pytube import YouTube
6 from pytube import YouTube
7 import requests
7 import requests
8
8
9 from boards.utils import validate_file_size
9 from boards.utils import validate_file_size
10
10
11 YOUTUBE_VIDEO_FORMAT = 'webm'
11 YOUTUBE_VIDEO_FORMAT = 'webm'
12
12
13 HTTP_RESULT_OK = 200
13 HTTP_RESULT_OK = 200
14
14
15 HEADER_CONTENT_LENGTH = 'content-length'
15 HEADER_CONTENT_LENGTH = 'content-length'
16 HEADER_CONTENT_TYPE = 'content-type'
16 HEADER_CONTENT_TYPE = 'content-type'
17
17
18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
19
19
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
21
21
22 TYPE_URL_ONLY = (
22 TYPE_URL_ONLY = (
23 'application/xhtml+xml',
23 'application/xhtml+xml',
24 'text/html',
24 'text/html',
25 )
25 )
26
26
27
27
28 class Downloader:
28 class Downloader:
29 @staticmethod
29 @staticmethod
30 def handles(url: str) -> bool:
30 def handles(url: str) -> bool:
31 return False
31 return False
32
32
33 @staticmethod
33 @staticmethod
34 def download(url: str):
34 def download(url: str):
35 # Verify content headers
35 # Verify content headers
36 response_head = requests.head(url, verify=False)
36 response_head = requests.head(url, verify=False)
37 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
37 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
38 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
38 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
39 if length_header:
39 if length_header:
40 length = int(length_header)
40 length = int(length_header)
41 validate_file_size(length)
41 validate_file_size(length)
42 # Get the actual content into memory
42 # Get the actual content into memory
43 response = requests.get(url, verify=False, stream=True)
43 response = requests.get(url, verify=False, stream=True)
44
44
45 # Download file, stop if the size exceeds limit
45 # Download file, stop if the size exceeds limit
46 size = 0
46 size = 0
47
47
48 # Set a dummy file name that will be replaced
48 # Set a dummy file name that will be replaced
49 # anyway, just keep the valid extension
49 # anyway, just keep the valid extension
50 filename = 'file.' + content_type.split('/')[1]
50 filename = 'file.' + content_type.split('/')[1]
51
51
52 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
52 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
53 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
53 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
54 size += len(chunk)
54 size += len(chunk)
55 validate_file_size(size)
55 validate_file_size(size)
56 file.write(chunk)
56 file.write(chunk)
57
57
58 if response.status_code == HTTP_RESULT_OK:
58 if response.status_code == HTTP_RESULT_OK:
59 return file
59 return file
60
60
61
61
62 def download(url):
62 def download(url):
63 for downloader in Downloader.__subclasses__():
63 for downloader in Downloader.__subclasses__():
64 if downloader.handles(url):
64 if downloader.handles(url):
65 return downloader.download(url)
65 return downloader.download(url)
66 # If nobody of the specific downloaders handles this, use generic
66 # If nobody of the specific downloaders handles this, use generic
67 # one
67 # one
68 return Downloader.download(url)
68 return Downloader.download(url)
69
69
70
70
71 class YouTubeDownloader(Downloader):
71 class YouTubeDownloader(Downloader):
72 @staticmethod
72 @staticmethod
73 def download(url: str):
73 def download(url: str):
74 yt = YouTube()
74 yt = YouTube()
75 yt.from_url(url)
75 yt.from_url(url)
76 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
76 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
77 if len(videos) > 0:
77 if len(videos) > 0:
78 video = videos[0]
78 video = videos[0]
79 return Downloader.download(video.url)
79 return Downloader.download(video.url)
80
80
81 @staticmethod
81 @staticmethod
82 def handles(url: str) -> bool:
82 def handles(url: str) -> bool:
83 return YOUTUBE_URL.match(url)
83 return YOUTUBE_URL.match(url)
84
84
85
85
86 class NothingDownloader(Downloader):
86 class NothingDownloader(Downloader):
87 @staticmethod
87 @staticmethod
88 def handles(url: str) -> bool:
88 def handles(url: str) -> bool:
89 response_head = requests.head(url, verify=False)
89 response_head = requests.head(url, verify=False)
90 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
90 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
91 return content_type in TYPE_URL_ONLY
91 return content_type in TYPE_URL_ONLY and not YOUTUBE_URL.match(url)
92
92
93 @staticmethod
93 @staticmethod
94 def download(url: str):
94 def download(url: str):
95 return None
95 return None
General Comments 0
You need to be logged in to leave comments. Login now