u/bodqhrohro/swineboard Files · boards/models/attachment/downloaders.py

Merge tip

neko259 - - Load All Authors

File last commit:

r1884:00dc6213 default


                r2144:1765cedc

lite

Download file

             downloaders.py
        
                    101 lines
            
             | 2.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / boards / models / attachment / downloaders.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        neko259
    
Download webm videos from youtube

              r1328
            
      import re

        neko259
    
Show domain next to URL if available

              r1765
            
      import requests

      from django.core.files.uploadedfile import TemporaryUploadedFile

        neko259
    
Download webm videos from youtube

              r1328
            
      from pytube import YouTube

      from boards.utils import validate_file_size

      YOUTUBE_VIDEO_FORMAT = 'webm'

      HTTP_RESULT_OK = 200

      HEADER_CONTENT_LENGTH = 'content-length'

      HEADER_CONTENT_TYPE = 'content-type'

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
      FILE_DOWNLOAD_CHUNK_BYTES = 200000

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Show domain next to URL if available

              r1765
            
      REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')

      REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Download HTML only as a link, not as a file

              r1683
            
      TYPE_URL_ONLY = (

          'application/xhtml+xml',

          'text/html',

      )

        neko259
    
Download webm videos from youtube

              r1328
            
      class Downloader:

          @staticmethod

          def handles(url: str) -> bool:

        neko259
    
Specific order of downloaders ensures they will work as expected

              r1811
            
              return True

        neko259
    
Download webm videos from youtube

              r1328
            
          @staticmethod

        neko259
    
Do not run validation on files while performing sync

              r1833
            
          def download(url: str, validate):

        neko259
    
Download webm videos from youtube

              r1328
            
              # Verify content headers

              response_head = requests.head(url, verify=False)

              content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]

        neko259
    
Do not run validation on files while performing sync

              r1833
            
              if validate and content_type in TYPE_URL_ONLY:

        neko259
    
Do not download HTML as a file

              r1809
            
                  return None

        neko259
    
Download webm videos from youtube

              r1328
            
              length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)

        neko259
    
Do not run validation on files while performing sync

              r1833
            
              if validate and length_header:

        neko259
    
Download webm videos from youtube

              r1328
            
                  length = int(length_header)

                  validate_file_size(length)

              # Get the actual content into memory

              response = requests.get(url, verify=False, stream=True)

        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
              if response.status_code == HTTP_RESULT_OK:

                  # Download file, stop if the size exceeds limit

                  size = 0

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
                  # Set a dummy file name that will be replaced

                  # anyway, just keep the valid extension

                  filename = 'file.' + content_type.split('/')[1]

        neko259
    
Download webm videos from youtube

              r1328
            
        neko259
    
Do not try to none-load youtube URLs. Try to download files only if response was 200

              r1801
            
                  file = TemporaryUploadedFile(filename, content_type, 0, None, None)

                  for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):

                      size += len(chunk)

                      validate_file_size(size)

                      file.write(chunk)

        neko259
    
Download attachments to tmp file, not into memory

              r1394
            
                  return file

        neko259
    
Download webm videos from youtube

              r1328
            
      class YouTubeDownloader(Downloader):

          @staticmethod

        neko259
    
Do not run validation on files while performing sync

              r1833
            
          def download(url: str, validate):

        neko259
    
Download webm videos from youtube

              r1328
            
              yt = YouTube()

              yt.from_url(url)

              videos = yt.filter(YOUTUBE_VIDEO_FORMAT)

              if len(videos) > 0:

                  video = videos[0]

        neko259
    
Download video from youtube directly, use pytube only for getting the link

              r1334
            
                  return Downloader.download(video.url)

        neko259
    
Download webm videos from youtube

              r1328
            
          @staticmethod

          def handles(url: str) -> bool:

        neko259
    
Show domain next to URL if available

              r1765
            
              return REGEX_YOUTUBE_URL.match(url) is not None

        neko259
    
Added image aliases to upload the same images (like "fake" or "gtfo")

              r1500
            
        neko259
    
Download HTML only as a link, not as a file

              r1683
            
      class NothingDownloader(Downloader):

          @staticmethod

          def handles(url: str) -> bool:

        neko259
    
NoneDownloader should handle only protocols that are 100% not processed by a simple downloader

              r1810
            
              return REGEX_MAGNET.match(url)

        neko259
    
Download HTML only as a link, not as a file

              r1683
            
          @staticmethod

        neko259
    
Do not run validation on files while performing sync

              r1833
            
          def download(url: str, validate):

        neko259
    
Download HTML only as a link, not as a file

              r1683
            
              return None

        neko259
    
Specific order of downloaders ensures they will work as expected

              r1811
            
      DOWNLOADERS = (

          YouTubeDownloader,

          NothingDownloader,

          Downloader,

      )

        neko259
    
Do not run validation on files while performing sync

              r1833
            
      def download(url, validate=True):

        neko259
    
Specific order of downloaders ensures they will work as expected

              r1811
            
          for downloader in DOWNLOADERS:

              if downloader.handles(url):

        neko259
    
Do not run validation on files while performing sync

              r1833
            
                  return downloader.download(url, validate=validate)

        neko259
    
Specific order of downloaders ensures they will work as expected

              r1811
            
          raise Exception('No downloader supports this URL.')

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

neko259 Download webm videos from youtube	r1328	import re

neko259 Show domain next to URL if available	r1765	import requests
		from django.core.files.uploadedfile import TemporaryUploadedFile
neko259 Download webm videos from youtube	r1328	from pytube import YouTube

		from boards.utils import validate_file_size

		YOUTUBE_VIDEO_FORMAT = 'webm'

		HTTP_RESULT_OK = 200

		HEADER_CONTENT_LENGTH = 'content-length'
		HEADER_CONTENT_TYPE = 'content-type'

neko259 Download attachments to tmp file, not into memory	r1394	FILE_DOWNLOAD_CHUNK_BYTES = 200000
neko259 Download webm videos from youtube	r1328
neko259 Show domain next to URL if available	r1765	REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=\|youtu.be/)[-\w]+')
		REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
neko259 Download webm videos from youtube	r1328
neko259 Download HTML only as a link, not as a file	r1683	TYPE_URL_ONLY = (
		'application/xhtml+xml',
		'text/html',
		)

neko259 Download webm videos from youtube	r1328
		class Downloader:
		@staticmethod
		def handles(url: str) -> bool:
neko259 Specific order of downloaders ensures they will work as expected	r1811	return True
neko259 Download webm videos from youtube	r1328
		@staticmethod
neko259 Do not run validation on files while performing sync	r1833	def download(url: str, validate):
neko259 Download webm videos from youtube	r1328	# Verify content headers
		response_head = requests.head(url, verify=False)
		content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
neko259 Do not run validation on files while performing sync	r1833	if validate and content_type in TYPE_URL_ONLY:
neko259 Do not download HTML as a file	r1809	return None

neko259 Download webm videos from youtube	r1328	length_header = response_head.headers.get(HEADER_CONTENT_LENGTH)
neko259 Do not run validation on files while performing sync	r1833	if validate and length_header:
neko259 Download webm videos from youtube	r1328	length = int(length_header)
		validate_file_size(length)
		# Get the actual content into memory
		response = requests.get(url, verify=False, stream=True)

neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	if response.status_code == HTTP_RESULT_OK:
		# Download file, stop if the size exceeds limit
		size = 0
neko259 Download attachments to tmp file, not into memory	r1394
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	# Set a dummy file name that will be replaced
		# anyway, just keep the valid extension
		filename = 'file.' + content_type.split('/')[1]
neko259 Download webm videos from youtube	r1328
neko259 Do not try to none-load youtube URLs. Try to download files only if response was 200	r1801	file = TemporaryUploadedFile(filename, content_type, 0, None, None)
		for chunk in response.iter_content(FILE_DOWNLOAD_CHUNK_BYTES):
		size += len(chunk)
		validate_file_size(size)
		file.write(chunk)

neko259 Download attachments to tmp file, not into memory	r1394	return file
neko259 Download webm videos from youtube	r1328

		class YouTubeDownloader(Downloader):
		@staticmethod
neko259 Do not run validation on files while performing sync	r1833	def download(url: str, validate):
neko259 Download webm videos from youtube	r1328	yt = YouTube()
		yt.from_url(url)
		videos = yt.filter(YOUTUBE_VIDEO_FORMAT)
		if len(videos) > 0:
		video = videos[0]
neko259 Download video from youtube directly, use pytube only for getting the link	r1334	return Downloader.download(video.url)
neko259 Download webm videos from youtube	r1328
		@staticmethod
		def handles(url: str) -> bool:
neko259 Show domain next to URL if available	r1765	return REGEX_YOUTUBE_URL.match(url) is not None
neko259 Added image aliases to upload the same images (like "fake" or "gtfo")	r1500
neko259 Download HTML only as a link, not as a file	r1683
		class NothingDownloader(Downloader):
		@staticmethod
		def handles(url: str) -> bool:
neko259 NoneDownloader should handle only protocols that are 100% not processed by a simple downloader	r1810	return REGEX_MAGNET.match(url)
neko259 Download HTML only as a link, not as a file	r1683
		@staticmethod
neko259 Do not run validation on files while performing sync	r1833	def download(url: str, validate):
neko259 Download HTML only as a link, not as a file	r1683	return None
neko259 Specific order of downloaders ensures they will work as expected	r1811

		DOWNLOADERS = (
		YouTubeDownloader,
		NothingDownloader,
		Downloader,
		)


neko259 Do not run validation on files while performing sync	r1833	def download(url, validate=True):
neko259 Specific order of downloaders ensures they will work as expected	r1811	for downloader in DOWNLOADERS:
		if downloader.handles(url):
neko259 Do not run validation on files while performing sync	r1833	return downloader.download(url, validate=validate)
neko259 Specific order of downloaders ensures they will work as expected	r1811	raise Exception('No downloader supports this URL.')