diff --git a/boards/forms/__init__.py b/boards/forms/__init__.py --- a/boards/forms/__init__.py +++ b/boards/forms/__init__.py @@ -16,7 +16,7 @@ from boards.abstracts.settingsmanager im from boards.forms.fields import UrlFileField from boards.mdx_neboard import formatters from boards.models import Tag -from boards.models.attachment.downloaders import download +from boards.models.attachment.downloaders import download, REGEX_MAGNET from boards.models.post import TITLE_MAX_LENGTH from boards.utils import validate_file_size, get_file_mimetype, \ FILE_EXTENSION_DELIMITER @@ -29,7 +29,7 @@ POW_LIFE_MINUTES = 5 REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE) REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE) -REGEX_URL = re.compile(r'^(http|https|ftp|magnet):\/\/', re.UNICODE) +REGEX_URL = re.compile(r'^(http|https|ftp):\/\/', re.UNICODE) VETERAN_POSTING_DELAY = 5 @@ -332,7 +332,7 @@ class PostForm(NeboardForm): self._update_file_extension(file) except forms.ValidationError as e: # Assume we will get the plain URL instead of a file and save it - if REGEX_URL.match(url): + if REGEX_URL.match(url) or REGEX_MAGNET.match(url): logger.info('Error in forms: {}'.format(e)) return url else: diff --git a/boards/management/commands/statistics.py b/boards/management/commands/statistics.py --- a/boards/management/commands/statistics.py +++ b/boards/management/commands/statistics.py @@ -12,8 +12,7 @@ class Command(BaseCommand): print('* Domains and their usage') domains = {} for attachment in Attachment.objects.exclude(url=''): - full_domain = attachment.url.split('/')[2] - domain = get_domain(full_domain) + domain = get_domain(attachment.url) if domain in domains: domains[domain] += 1 else: @@ -29,7 +28,7 @@ class Command(BaseCommand): print('* File types') mimetypes = Attachment.objects.filter(url='')\ - .values('mimetype').annotate(count=Count('id'))\ - .order_by('-count') + .values('mimetype').annotate(count=Count('id'))\ + .order_by('-count') for mimetype in mimetypes: print('{}: {}'.format(mimetype['mimetype'], mimetype['count'])) diff --git a/boards/models/attachment/downloaders.py b/boards/models/attachment/downloaders.py --- a/boards/models/attachment/downloaders.py +++ b/boards/models/attachment/downloaders.py @@ -1,10 +1,8 @@ -import os import re -from django.core.files.uploadedfile import SimpleUploadedFile, \ - TemporaryUploadedFile +import requests +from django.core.files.uploadedfile import TemporaryUploadedFile from pytube import YouTube -import requests from boards.utils import validate_file_size @@ -17,7 +15,8 @@ HEADER_CONTENT_TYPE = 'content-type' FILE_DOWNLOAD_CHUNK_BYTES = 200000 -YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') +REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') +REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*') TYPE_URL_ONLY = ( 'application/xhtml+xml', @@ -80,15 +79,18 @@ class YouTubeDownloader(Downloader): @staticmethod def handles(url: str) -> bool: - return YOUTUBE_URL.match(url) + return REGEX_YOUTUBE_URL.match(url) is not None class NothingDownloader(Downloader): @staticmethod def handles(url: str) -> bool: + if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url): + return True + response_head = requests.head(url, verify=False) content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] - return content_type in TYPE_URL_ONLY and not YOUTUBE_URL.match(url) + return content_type in TYPE_URL_ONLY @staticmethod def download(url: str): diff --git a/boards/models/attachment/viewers.py b/boards/models/attachment/viewers.py --- a/boards/models/attachment/viewers.py +++ b/boards/models/attachment/viewers.py @@ -168,21 +168,24 @@ class UrlViewer(AbstractViewer): def get_view(self): return '
' \ '{}' \ - '
'.format(self.get_format_view()) + '
{}
' \ + ''.format(self.get_format_view(), get_domain(self.url)) def get_format_view(self): protocol = self.url.split('://')[0] - full_domain = self.url.split('/')[2] - domain = get_domain(full_domain) + + domain = get_domain(self.url) if protocol in URL_PROTOCOLS: url_image_name = URL_PROTOCOLS.get(protocol) - else: + elif domain: filename = 'images/domains/{}.png'.format(domain) if file_exists(filename): url_image_name = 'domains/' + domain else: url_image_name = FILE_STUB_URL + else: + url_image_name = FILE_STUB_URL image_path = 'images/{}.png'.format(url_image_name) image = static(image_path) @@ -191,3 +194,8 @@ class UrlViewer(AbstractViewer): return '' \ '' \ ''.format(self.url, image, w, h) + + + def _get_protocol(self): + pass + diff --git a/boards/utils.py b/boards/utils.py --- a/boards/utils.py +++ b/boards/utils.py @@ -153,21 +153,27 @@ def get_domain(url: str) -> str: """ Gets domain from an URL with random number of domain levels. """ - levels = url.split('.') - if len(levels) < 2: - return url - - top = levels[-1] - second = levels[-2] + domain_parts = url.split('/') + if len(domain_parts) >= 2: + full_domain = domain_parts[2] + else: + full_domain = '' - has_third_level = len(levels) > 2 - if has_third_level: - third = levels[-3] + result = full_domain + if full_domain: + levels = full_domain.split('.') + if len(levels) >= 2: + top = levels[-1] + second = levels[-2] - if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): - result = '{}.{}.{}'.format(third, second, top) - else: - result = '{}.{}'.format(second, top) + has_third_level = len(levels) > 2 + if has_third_level: + third = levels[-3] + + if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): + result = '{}.{}.{}'.format(third, second, top) + else: + result = '{}.{}'.format(second, top) return result