##// END OF EJS Templates
Do not download HTML as a file
Do not download HTML as a file

File last commit:

r1809:f75c0a41 default
r1809:f75c0a41 default
Show More
downloaders.py
103 lines | 2.9 KiB | text/x-python | PythonLexer
neko259
Download webm videos from youtube
r1328 import re
neko259
Show domain next to URL if available
r1765 import requests
from django.core.files.uploadedfile import TemporaryUploadedFile
neko259
Download webm videos from youtube
r1328 from pytube import YouTube
from boards.utils import validate_file_size
YOUTUBE_VIDEO_FORMAT = 'webm'
HTTP_RESULT_OK = 200
HEADER_CONTENT_LENGTH = 'content-length'
HEADER_CONTENT_TYPE = 'content-type'
neko259
Download attachments to tmp file, not into memory
r1394 FILE_DOWNLOAD_CHUNK_BYTES = 200000
neko259
Download webm videos from youtube
r1328
neko259
Show domain next to URL if available
r1765 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
neko259
Download webm videos from youtube
r1328
neko259
Download HTML only as a link, not as a file
r1683 TYPE_URL_ONLY = (
'application/xhtml+xml',
'text/html',
)
neko259
Download webm videos from youtube
r1328
class Downloader:
@staticmethod
def handles(url: str) -> bool:
return False
@staticmethod
def download(url: str):
# Verify content headers
response_head = requests.head(url, verify=False)
content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
neko259
Do not download HTML as a file
r1809 if content_type in TYPE_URL_ONLY:
return None
neko259
Download webm videos from youtube
r1328 length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
if length_header:
length = int(length_header)
validate_file_size(length)
# Get the actual content into memory
response = requests.get(url, verify=False, stream=True)
neko259
Do not try to none-load youtube URLs. Try to download files only if response was 200
r1801 if response.status_code == HTTP_RESULT_OK:
# Download file, stop if the size exceeds limit
size = 0
neko259
Download attachments to tmp file, not into memory
r1394
neko259
Do not try to none-load youtube URLs. Try to download files only if response was 200
r1801 # Set a dummy file name that will be replaced
# anyway, just keep the valid extension
filename = 'file.' + content_type.split('/')[1]
neko259
Download webm videos from youtube
r1328
neko259
Do not try to none-load youtube URLs. Try to download files only if response was 200
r1801 file = TemporaryUploadedFile(filename, content_type, 0, None, None)
for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
size += len(chunk)
validate_file_size(size)
file.write(chunk)
neko259
Download attachments to tmp file, not into memory
r1394 return file
neko259
Download webm videos from youtube
r1328
neko259
Download attached filed to the post during sync
r1511 def download(url):
for downloader in Downloader.__subclasses__():
if downloader.handles(url):
return downloader.download(url)
# If nobody of the specific downloaders handles this, use generic
# one
return Downloader.download(url)
neko259
Download webm videos from youtube
r1328 class YouTubeDownloader(Downloader):
@staticmethod
def download(url: str):
yt = YouTube()
yt.from_url(url)
videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
if len(videos) > 0:
video = videos[0]
neko259
Download video from youtube directly, use pytube only for getting the link
r1334 return Downloader.download(video.url)
neko259
Download webm videos from youtube
r1328
@staticmethod
def handles(url: str) -> bool:
neko259
Show domain next to URL if available
r1765 return REGEX_YOUTUBE_URL.match(url) is not None
neko259
Added image aliases to upload the same images (like "fake" or "gtfo")
r1500
neko259
Download HTML only as a link, not as a file
r1683
class NothingDownloader(Downloader):
@staticmethod
def handles(url: str) -> bool:
neko259
Do not try to none-load youtube URLs. Try to download files only if response was 200
r1801 if REGEX_MAGNET.match(url):
neko259
Show domain next to URL if available
r1765 return True
neko259
Download HTML only as a link, not as a file
r1683 response_head = requests.head(url, verify=False)
neko259
Do not try to none-load youtube URLs. Try to download files only if response was 200
r1801 if response_head.status_code == HTTP_RESULT_OK:
content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
return content_type in TYPE_URL_ONLY
else:
neko259
Fixed showing posts with tripcode in feed
r1804 return False
neko259
Download HTML only as a link, not as a file
r1683
@staticmethod
def download(url: str):
return None