u/bodqhrohro/swineboard Files · boards/models/attachment/downloaders.py

Do not download HTML as a file

neko259 - - Load All Authors

File last commit:

r1809:f75c0a41 default


                r1809:f75c0a41

default

Download file

             downloaders.py
        
                    103 lines
            
             | 2.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / boards / models / attachment / downloaders.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        neko259
    
Download webm videos from youtube

              r1328
            
      import re

        neko259
    
Show domain next to URL if available

              r1765
            
      import requests

      from django.core.files.uploadedfile import TemporaryUploadedFile

        neko259
    
Download webm videos from youtube

              r1328
            
      from pytube import YouTube

      from boards.utils import validate_file_size

      YOUTUBE_VIDEO_FORMAT = 'webm'

      HTTP_RESULT_OK = 200

      HEADER_CONTENT_LENGTH = 'content-length'

      HEADER_CONTENT_TYPE = 'content-type'

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
      FILE_DOWNLOAD_CHUNK_BYTES = 200000

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Show domain next to URL if available

              r1765
            
      REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')

      REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Download HTML only as a link, not as a file

              r1683
            
      TYPE_URL_ONLY = (

          'application/xhtml+xml',

          'text/html',

      )

        neko259
    
Download webm videos from youtube

              r1328
            
      class Downloader:

          @staticmethod

          def handles(url: str) -> bool:

              return False

          @staticmethod

          def download(url: str):

              # Verify content headers

              response_head = requests.head(url, verify=False)

              content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]

        neko259
    
Do not download HTML as a file

              r1809
            
              if content_type in TYPE_URL_ONLY:

                  return None

        neko259
    
Download webm videos from youtube

              r1328
            
              length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)

              if length_header:

                  length = int(length_header)

                  validate_file_size(length)

              # Get the actual content into memory

              response = requests.get(url, verify=False, stream=True)

        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
              if response.status_code == HTTP_RESULT_OK:

                  # Download file, stop if the size exceeds limit

                  size = 0

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
                  # Set a dummy file name that will be replaced

                  # anyway, just keep the valid extension

                  filename = 'file.' + content_type.split('/')[1]

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
                  file = TemporaryUploadedFile(filename, content_type, 0, None, None)

                  for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):

                      size += len(chunk)

                      validate_file_size(size)

                      file.write(chunk)

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
                  return file

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Download attached filed to the post during sync

              r1511
            
      def download(url):

          for downloader in Downloader.__subclasses__():

              if downloader.handles(url):

                  return downloader.download(url)

          # If nobody of the specific downloaders handles this, use generic

          # one

          return Downloader.download(url)

        neko259
    
Download webm videos from youtube

              r1328
            
      class YouTubeDownloader(Downloader):

          @staticmethod

          def download(url: str):

              yt = YouTube()

              yt.from_url(url)

              videos = yt.filter(YOUTUBE_VIDEO_FORMAT)

              if len(videos) > 0:

                  video = videos[0]

        neko259
    
Download video from youtube directly, use pytube only for getting the link

              r1334
            
                  return Downloader.download(video.url)

        neko259
    
Download webm videos from youtube

              r1328
            
          @staticmethod

          def handles(url: str) -> bool:

        neko259
    
Show domain next to URL if available

              r1765
            
              return REGEX_YOUTUBE_URL.match(url) is not None

        neko259
    
Added image aliases to upload the same images (like "fake" or "gtfo")

              r1500
            
        neko259
    
Download HTML only as a link, not as a file

              r1683
            
      class NothingDownloader(Downloader):

          @staticmethod

          def handles(url: str) -> bool:

        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
              if REGEX_MAGNET.match(url):

        neko259
    
Show domain next to URL if available

              r1765
            
                  return True

        neko259
    
Download HTML only as a link, not as a file

              r1683
            
              response_head = requests.head(url, verify=False)

        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
              if response_head.status_code == HTTP_RESULT_OK:

                  content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]

                  return content_type in TYPE_URL_ONLY

              else:

        neko259
    
Fixed showing posts with tripcode in feed

              r1804
            
                  return False

        neko259
    
Download HTML only as a link, not as a file

              r1683
            
          @staticmethod

          def download(url: str):

              return None

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

neko259 Download webm videos from youtube	r1328	import re

neko259 Show domain next to URL if available	r1765	import requests
		from django.core.files.uploadedfile import TemporaryUploadedFile
neko259 Download webm videos from youtube	r1328	from pytube import YouTube

		from boards.utils import validate_file_size

		YOUTUBE_VIDEO_FORMAT = 'webm'

		HTTP_RESULT_OK = 200

		HEADER_CONTENT_LENGTH = 'content-length'
		HEADER_CONTENT_TYPE = 'content-type'

neko259 Download attachments to tmp file, not into memory	r1394	FILE_DOWNLOAD_CHUNK_BYTES = 200000
neko259 Download webm videos from youtube	r1328
neko259 Show domain next to URL if available	r1765	REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=\|youtu.be/)[-\w]+')
		REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
neko259 Download webm videos from youtube	r1328
neko259 Download HTML only as a link, not as a file	r1683	TYPE_URL_ONLY = (
		'application/xhtml+xml',
		'text/html',
		)

neko259 Download webm videos from youtube	r1328
		class Downloader:
		@staticmethod
		def handles(url: str) -> bool:
		return False

		@staticmethod
		def download(url: str):
		# Verify content headers
		response_head = requests.head(url, verify=False)
		content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
neko259 Do not download HTML as a file	r1809	if content_type in TYPE_URL_ONLY:
		return None

neko259 Download webm videos from youtube	r1328	length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
		if length_header:
		length = int(length_header)
		validate_file_size(length)
		# Get the actual content into memory
		response = requests.get(url, verify=False, stream=True)

neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	if response.status_code == HTTP_RESULT_OK:
		# Download file, stop if the size exceeds limit
		size = 0
neko259 Download attachments to tmp file, not into memory	r1394
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	# Set a dummy file name that will be replaced
		# anyway, just keep the valid extension
		filename = 'file.' + content_type.split('/')[1]
neko259 Download webm videos from youtube	r1328
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	file = TemporaryUploadedFile(filename, content_type, 0, None, None)
		for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
		size += len(chunk)
		validate_file_size(size)
		file.write(chunk)

neko259 Download attachments to tmp file, not into memory	r1394	return file
neko259 Download webm videos from youtube	r1328

neko259 Download attached filed to the post during sync	r1511	def download(url):
		for downloader in Downloader.__subclasses__():
		if downloader.handles(url):
		return downloader.download(url)
		# If nobody of the specific downloaders handles this, use generic
		# one
		return Downloader.download(url)


neko259 Download webm videos from youtube	r1328	class YouTubeDownloader(Downloader):
		@staticmethod
		def download(url: str):
		yt = YouTube()
		yt.from_url(url)
		videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
		if len(videos) > 0:
		video = videos[0]
neko259 Download video from youtube directly, use pytube only for getting the link	r1334	return Downloader.download(video.url)
neko259 Download webm videos from youtube	r1328
		@staticmethod
		def handles(url: str) -> bool:
neko259 Show domain next to URL if available	r1765	return REGEX_YOUTUBE_URL.match(url) is not None
neko259 Added image aliases to upload the same images (like "fake" or "gtfo")	r1500
neko259 Download HTML only as a link, not as a file	r1683
		class NothingDownloader(Downloader):
		@staticmethod
		def handles(url: str) -> bool:
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	if REGEX_MAGNET.match(url):
neko259 Show domain next to URL if available	r1765	return True

neko259 Download HTML only as a link, not as a file	r1683	response_head = requests.head(url, verify=False)
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	if response_head.status_code == HTTP_RESULT_OK:
		content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
		return content_type in TYPE_URL_ONLY
		else:
neko259 Fixed showing posts with tripcode in feed	r1804	return False
neko259 Download HTML only as a link, not as a file	r1683
		@staticmethod
		def download(url: str):
		return None