# HG changeset patch # User neko259 # Date 2015-09-01 20:00:11 # Node ID 3352da826c8c7db295338f04c515569a9ceef7df # Parent f297fac219060763a939092328e9fdbef4425bc4 Download webm videos from youtube diff --git a/boards/forms.py b/boards/forms.py --- a/boards/forms.py +++ b/boards/forms.py @@ -8,18 +8,16 @@ from django.core.files.uploadedfile impo from django.core.exceptions import ObjectDoesNotExist from django.forms.util import ErrorList from django.utils.translation import ugettext_lazy as _ -import requests from boards.mdx_neboard import formatters +from boards.models.attachment.downloaders import Downloader from boards.models.post import TITLE_MAX_LENGTH from boards.models import Tag, Post +from boards.utils import validate_file_size from neboard import settings import boards.settings as board_settings import neboard -HEADER_CONTENT_LENGTH = 'content-length' -HEADER_CONTENT_TYPE = 'content-type' - REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE) VETERAN_POSTING_DELAY = 5 @@ -41,10 +39,6 @@ ERROR_SPEED = _('Please wait %s seconds TAG_MAX_LENGTH = 20 -FILE_DOWNLOAD_CHUNK_BYTES = 100000 - -HTTP_RESULT_OK = 200 - TEXTAREA_ROWS = 4 @@ -182,7 +176,7 @@ class PostForm(NeboardForm): file = self.cleaned_data['file'] if file: - self.validate_file_size(file.size) + validate_file_size(file.size) return file @@ -196,7 +190,7 @@ class PostForm(NeboardForm): if not file: raise forms.ValidationError(_('Invalid URL')) else: - self.validate_file_size(file.size) + validate_file_size(file.size) return file @@ -294,13 +288,6 @@ class PostForm(NeboardForm): if can_post: self.session[LAST_POST_TIME] = now - def validate_file_size(self, size: int): - max_size = board_settings.get_int('Forms', 'MaxFileSize') - if size > max_size: - raise forms.ValidationError( - _('File must be less than %s bytes') - % str(max_size)) - def _get_file_from_url(self, url: str) -> SimpleUploadedFile: """ Gets an file file from URL. @@ -309,36 +296,18 @@ class PostForm(NeboardForm): img_temp = None try: - # Verify content headers - response_head = requests.head(url, verify=False) - content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] - length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) - if length_header: - length = int(length_header) - self.validate_file_size(length) - # Get the actual content into memory - response = requests.get(url, verify=False, stream=True) - - # Download file, stop if the size exceeds limit - size = 0 - content = b'' - for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): - size += len(chunk) - self.validate_file_size(size) - content += chunk - - if response.status_code == HTTP_RESULT_OK and content: - # Set a dummy file name that will be replaced - # anyway, just keep the valid extension - filename = 'file.' + content_type.split('/')[1] - img_temp = SimpleUploadedFile(filename, content, - content_type) + for downloader in Downloader.__subclasses__(): + if downloader.handles(url): + return downloader.download(url) + # If nobody of the specific downloaders handles this, use generic + # one + return Downloader.download(url) + except forms.ValidationError as e: + raise e except Exception as e: # Just return no file pass - return img_temp - class ThreadForm(PostForm): diff --git a/boards/models/attachment/downloaders.py b/boards/models/attachment/downloaders.py new file mode 100644 --- /dev/null +++ b/boards/models/attachment/downloaders.py @@ -0,0 +1,83 @@ +import os +import re + +from django.core.files.uploadedfile import SimpleUploadedFile +from pytube import YouTube +import requests + +from boards.utils import validate_file_size + +YOUTUBE_VIDEO_FORMAT = 'webm' + +HTTP_RESULT_OK = 200 + +HEADER_CONTENT_LENGTH = 'content-length' +HEADER_CONTENT_TYPE = 'content-type' + +FILE_DOWNLOAD_CHUNK_BYTES = 100000 + +YOUTUBE_URL = re.compile(r'https?://www\.youtube\.com/watch\?v=\w+') + + +class Downloader: + @staticmethod + def handles(url: str) -> bool: + return False + + @staticmethod + def download(url: str): + # Verify content headers + response_head = requests.head(url, verify=False) + content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] + length_header = response_head.headers.get(HEADER_CONTENT_LENGTH) + if length_header: + length = int(length_header) + validate_file_size(length) + # Get the actual content into memory + response = requests.get(url, verify=False, stream=True) + + # Download file, stop if the size exceeds limit + size = 0 + content = b'' + for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES): + size += len(chunk) + validate_file_size(size) + content += chunk + + if response.status_code == HTTP_RESULT_OK and content: + # Set a dummy file name that will be replaced + # anyway, just keep the valid extension + filename = 'file.' + content_type.split('/')[1] + return SimpleUploadedFile(filename, content, content_type) + + +class YouTubeDownloader(Downloader): + @staticmethod + def download(url: str): + yt = YouTube() + yt.from_url(url) + videos = yt.filter(YOUTUBE_VIDEO_FORMAT) + if len(videos) > 0: + video = videos[0] + filename = '{}.{}'.format(video.filename, video.extension) + try: + video.download(on_progress=YouTubeDownloader.on_progress) + + file = open(filename, 'rb') + content = file.read() + file.close() + + os.remove(filename) + return SimpleUploadedFile(filename, content, video.extension) + except Exception as e: + if os.path.isfile(filename): + os.remove(filename) + raise e + + @staticmethod + def handles(url: str) -> bool: + return YOUTUBE_URL.match(url) + + @staticmethod + def on_progress(bytes, file_size, start): + validate_file_size(file_size) diff --git a/boards/utils.py b/boards/utils.py --- a/boards/utils.py +++ b/boards/utils.py @@ -7,8 +7,11 @@ import hmac from django.core.cache import cache from django.db.models import Model +from django import forms from django.utils import timezone +from django.utils.translation import ugettext_lazy as _ +import boards from neboard import settings @@ -90,3 +93,11 @@ def get_file_hash(file) -> str: for chunk in file.chunks(): md5.update(chunk) return md5.hexdigest() + + +def validate_file_size(size: int): + max_size = boards.settings.get_int('Forms', 'MaxFileSize') + if size > max_size: + raise forms.ValidationError( + _('File must be less than %s bytes') + % str(max_size)) diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +pytube requests adjacent django-haystack