##// END OF EJS Templates
Show domain next to URL if available
Show domain next to URL if available

File last commit:

r1765:7a6a61e1 default
r1765:7a6a61e1 default
Show More
downloaders.py
97 lines | 2.8 KiB | text/x-python | PythonLexer
neko259
Download webm videos from youtube
r1328 import re
neko259
Show domain next to URL if available
r1765 import requests
from django.core.files.uploadedfile import TemporaryUploadedFile
neko259
Download webm videos from youtube
r1328 from pytube import YouTube
from boards.utils import validate_file_size
YOUTUBE_VIDEO_FORMAT = 'webm'
HTTP_RESULT_OK = 200
HEADER_CONTENT_LENGTH = 'content-length'
HEADER_CONTENT_TYPE = 'content-type'
neko259
Download attachments to tmp file, not into memory
r1394 FILE_DOWNLOAD_CHUNK_BYTES = 200000
neko259
Download webm videos from youtube
r1328
neko259
Show domain next to URL if available
r1765 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
neko259
Download webm videos from youtube
r1328
neko259
Download HTML only as a link, not as a file
r1683 TYPE_URL_ONLY = (
'application/xhtml+xml',
'text/html',
)
neko259
Download webm videos from youtube
r1328
class Downloader:
@staticmethod
def handles(url: str) -> bool:
return False
@staticmethod
def download(url: str):
# Verify content headers
response_head = requests.head(url, verify=False)
content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
if length_header:
length = int(length_header)
validate_file_size(length)
# Get the actual content into memory
response = requests.get(url, verify=False, stream=True)
# Download file, stop if the size exceeds limit
size = 0
neko259
Download attachments to tmp file, not into memory
r1394
# Set a dummy file name that will be replaced
# anyway, just keep the valid extension
filename = 'file.' + content_type.split('/')[1]
file = TemporaryUploadedFile(filename, content_type, 0, None, None)
neko259
Download webm videos from youtube
r1328 for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
size += len(chunk)
validate_file_size(size)
neko259
Download attachments to tmp file, not into memory
r1394 file.write(chunk)
neko259
Download webm videos from youtube
r1328
neko259
Download attachments to tmp file, not into memory
r1394 if response.status_code == HTTP_RESULT_OK:
return file
neko259
Download webm videos from youtube
r1328
neko259
Download attached filed to the post during sync
r1511 def download(url):
for downloader in Downloader.__subclasses__():
if downloader.handles(url):
return downloader.download(url)
# If nobody of the specific downloaders handles this, use generic
# one
return Downloader.download(url)
neko259
Download webm videos from youtube
r1328 class YouTubeDownloader(Downloader):
@staticmethod
def download(url: str):
yt = YouTube()
yt.from_url(url)
videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
if len(videos) > 0:
video = videos[0]
neko259
Download video from youtube directly, use pytube only for getting the link
r1334 return Downloader.download(video.url)
neko259
Download webm videos from youtube
r1328
@staticmethod
def handles(url: str) -> bool:
neko259
Show domain next to URL if available
r1765 return REGEX_YOUTUBE_URL.match(url) is not None
neko259
Added image aliases to upload the same images (like "fake" or "gtfo")
r1500
neko259
Download HTML only as a link, not as a file
r1683
class NothingDownloader(Downloader):
@staticmethod
def handles(url: str) -> bool:
neko259
Show domain next to URL if available
r1765 if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url):
return True
neko259
Download HTML only as a link, not as a file
r1683 response_head = requests.head(url, verify=False)
content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
neko259
Show domain next to URL if available
r1765 return content_type in TYPE_URL_ONLY
neko259
Download HTML only as a link, not as a file
r1683
@staticmethod
def download(url: str):
return None