##// END OF EJS Templates
Download HTML only as a link, not as a file
neko259 -
r1683:0b52d251 default
parent child Browse files
Show More
@@ -1,79 +1,95 b''
1 1 import os
2 2 import re
3 3
4 4 from django.core.files.uploadedfile import SimpleUploadedFile, \
5 5 TemporaryUploadedFile
6 6 from pytube import YouTube
7 7 import requests
8 8
9 9 from boards.utils import validate_file_size
10 10
11 11 YOUTUBE_VIDEO_FORMAT = 'webm'
12 12
13 13 HTTP_RESULT_OK = 200
14 14
15 15 HEADER_CONTENT_LENGTH = 'content-length'
16 16 HEADER_CONTENT_TYPE = 'content-type'
17 17
18 18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
19 19
20 20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
21 21
22 TYPE_URL_ONLY = (
23 'application/xhtml+xml',
24 'text/html',
25 )
26
22 27
23 28 class Downloader:
24 29 @staticmethod
25 30 def handles(url: str) -> bool:
26 31 return False
27 32
28 33 @staticmethod
29 34 def download(url: str):
30 35 # Verify content headers
31 36 response_head = requests.head(url, verify=False)
32 37 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
33 38 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
34 39 if length_header:
35 40 length = int(length_header)
36 41 validate_file_size(length)
37 42 # Get the actual content into memory
38 43 response = requests.get(url, verify=False, stream=True)
39 44
40 45 # Download file, stop if the size exceeds limit
41 46 size = 0
42 47
43 48 # Set a dummy file name that will be replaced
44 49 # anyway, just keep the valid extension
45 50 filename = 'file.' + content_type.split('/')[1]
46 51
47 52 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
48 53 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
49 54 size += len(chunk)
50 55 validate_file_size(size)
51 56 file.write(chunk)
52 57
53 58 if response.status_code == HTTP_RESULT_OK:
54 59 return file
55 60
56 61
57 62 def download(url):
58 63 for downloader in Downloader.__subclasses__():
59 64 if downloader.handles(url):
60 65 return downloader.download(url)
61 66 # If nobody of the specific downloaders handles this, use generic
62 67 # one
63 68 return Downloader.download(url)
64 69
65 70
66 71 class YouTubeDownloader(Downloader):
67 72 @staticmethod
68 73 def download(url: str):
69 74 yt = YouTube()
70 75 yt.from_url(url)
71 76 videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
72 77 if len(videos) > 0:
73 78 video = videos[0]
74 79 return Downloader.download(video.url)
75 80
76 81 @staticmethod
77 82 def handles(url: str) -> bool:
78 83 return YOUTUBE_URL.match(url)
79 84
85
86 class NothingDownloader(Downloader):
87 @staticmethod
88 def handles(url: str) -> bool:
89 response_head = requests.head(url, verify=False)
90 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
91 return content_type in TYPE_URL_ONLY
92
93 @staticmethod
94 def download(url: str):
95 return None
General Comments 0
You need to be logged in to leave comments. Login now