##// END OF EJS Templates
Show domain next to URL if available
neko259 -
r1765:7a6a61e1 default
parent child Browse files
Show More
@@ -16,7 +16,7 b' from boards.abstracts.settingsmanager im'
16 from boards.forms.fields import UrlFileField
16 from boards.forms.fields import UrlFileField
17 from boards.mdx_neboard import formatters
17 from boards.mdx_neboard import formatters
18 from boards.models import Tag
18 from boards.models import Tag
19 from boards.models.attachment.downloaders import download
19 from boards.models.attachment.downloaders import download, REGEX_MAGNET
20 from boards.models.post import TITLE_MAX_LENGTH
20 from boards.models.post import TITLE_MAX_LENGTH
21 from boards.utils import validate_file_size, get_file_mimetype, \
21 from boards.utils import validate_file_size, get_file_mimetype, \
22 FILE_EXTENSION_DELIMITER
22 FILE_EXTENSION_DELIMITER
@@ -29,7 +29,7 b' POW_LIFE_MINUTES = 5'
29
29
30 REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE)
30 REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE)
31 REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE)
31 REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE)
32 REGEX_URL = re.compile(r'^(http|https|ftp|magnet):\/\/', re.UNICODE)
32 REGEX_URL = re.compile(r'^(http|https|ftp):\/\/', re.UNICODE)
33
33
34 VETERAN_POSTING_DELAY = 5
34 VETERAN_POSTING_DELAY = 5
35
35
@@ -332,7 +332,7 b' class PostForm(NeboardForm):'
332 self._update_file_extension(file)
332 self._update_file_extension(file)
333 except forms.ValidationError as e:
333 except forms.ValidationError as e:
334 # Assume we will get the plain URL instead of a file and save it
334 # Assume we will get the plain URL instead of a file and save it
335 if REGEX_URL.match(url):
335 if REGEX_URL.match(url) or REGEX_MAGNET.match(url):
336 logger.info('Error in forms: {}'.format(e))
336 logger.info('Error in forms: {}'.format(e))
337 return url
337 return url
338 else:
338 else:
@@ -12,8 +12,7 b' class Command(BaseCommand):'
12 print('* Domains and their usage')
12 print('* Domains and their usage')
13 domains = {}
13 domains = {}
14 for attachment in Attachment.objects.exclude(url=''):
14 for attachment in Attachment.objects.exclude(url=''):
15 full_domain = attachment.url.split('/')[2]
15 domain = get_domain(attachment.url)
16 domain = get_domain(full_domain)
17 if domain in domains:
16 if domain in domains:
18 domains[domain] += 1
17 domains[domain] += 1
19 else:
18 else:
@@ -29,7 +28,7 b' class Command(BaseCommand):'
29
28
30 print('* File types')
29 print('* File types')
31 mimetypes = Attachment.objects.filter(url='')\
30 mimetypes = Attachment.objects.filter(url='')\
32 .values('mimetype').annotate(count=Count('id'))\
31 .values('mimetype').annotate(count=Count('id'))\
33 .order_by('-count')
32 .order_by('-count')
34 for mimetype in mimetypes:
33 for mimetype in mimetypes:
35 print('{}: {}'.format(mimetype['mimetype'], mimetype['count']))
34 print('{}: {}'.format(mimetype['mimetype'], mimetype['count']))
@@ -1,10 +1,8 b''
1 import os
2 import re
1 import re
3
2
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
3 import requests
5 TemporaryUploadedFile
4 from django.core.files.uploadedfile import TemporaryUploadedFile
6 from pytube import YouTube
5 from pytube import YouTube
7 import requests
8
6
9 from boards.utils import validate_file_size
7 from boards.utils import validate_file_size
10
8
@@ -17,7 +15,8 b" HEADER_CONTENT_TYPE = 'content-type'"
17
15
18 FILE_DOWNLOAD_CHUNK_BYTES = 200000
16 FILE_DOWNLOAD_CHUNK_BYTES = 200000
19
17
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
18 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
19 REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
21
20
22 TYPE_URL_ONLY = (
21 TYPE_URL_ONLY = (
23 'application/xhtml+xml',
22 'application/xhtml+xml',
@@ -80,15 +79,18 b' class YouTubeDownloader(Downloader):'
80
79
81 @staticmethod
80 @staticmethod
82 def handles(url: str) -> bool:
81 def handles(url: str) -> bool:
83 return YOUTUBE_URL.match(url)
82 return REGEX_YOUTUBE_URL.match(url) is not None
84
83
85
84
86 class NothingDownloader(Downloader):
85 class NothingDownloader(Downloader):
87 @staticmethod
86 @staticmethod
88 def handles(url: str) -> bool:
87 def handles(url: str) -> bool:
88 if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url):
89 return True
90
89 response_head = requests.head(url, verify=False)
91 response_head = requests.head(url, verify=False)
90 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
92 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
91 return content_type in TYPE_URL_ONLY and not YOUTUBE_URL.match(url)
93 return content_type in TYPE_URL_ONLY
92
94
93 @staticmethod
95 @staticmethod
94 def download(url: str):
96 def download(url: str):
@@ -168,21 +168,24 b' class UrlViewer(AbstractViewer):'
168 def get_view(self):
168 def get_view(self):
169 return '<div class="image">' \
169 return '<div class="image">' \
170 '{}' \
170 '{}' \
171 '</div>'.format(self.get_format_view())
171 '<div class="image-metadata">{}</div>' \
172 '</div>'.format(self.get_format_view(), get_domain(self.url))
172
173
173 def get_format_view(self):
174 def get_format_view(self):
174 protocol = self.url.split('://')[0]
175 protocol = self.url.split('://')[0]
175 full_domain = self.url.split('/')[2]
176
176 domain = get_domain(full_domain)
177 domain = get_domain(self.url)
177
178
178 if protocol in URL_PROTOCOLS:
179 if protocol in URL_PROTOCOLS:
179 url_image_name = URL_PROTOCOLS.get(protocol)
180 url_image_name = URL_PROTOCOLS.get(protocol)
180 else:
181 elif domain:
181 filename = 'images/domains/{}.png'.format(domain)
182 filename = 'images/domains/{}.png'.format(domain)
182 if file_exists(filename):
183 if file_exists(filename):
183 url_image_name = 'domains/' + domain
184 url_image_name = 'domains/' + domain
184 else:
185 else:
185 url_image_name = FILE_STUB_URL
186 url_image_name = FILE_STUB_URL
187 else:
188 url_image_name = FILE_STUB_URL
186
189
187 image_path = 'images/{}.png'.format(url_image_name)
190 image_path = 'images/{}.png'.format(url_image_name)
188 image = static(image_path)
191 image = static(image_path)
@@ -191,3 +194,8 b' class UrlViewer(AbstractViewer):'
191 return '<a href="{}">' \
194 return '<a href="{}">' \
192 '<img class="url-image" src="{}" width="{}" height="{}"/>' \
195 '<img class="url-image" src="{}" width="{}" height="{}"/>' \
193 '</a>'.format(self.url, image, w, h)
196 '</a>'.format(self.url, image, w, h)
197
198
199 def _get_protocol(self):
200 pass
201
@@ -153,21 +153,27 b' def get_domain(url: str) -> str:'
153 """
153 """
154 Gets domain from an URL with random number of domain levels.
154 Gets domain from an URL with random number of domain levels.
155 """
155 """
156 levels = url.split('.')
156 domain_parts = url.split('/')
157 if len(levels) < 2:
157 if len(domain_parts) >= 2:
158 return url
158 full_domain = domain_parts[2]
159
159 else:
160 top = levels[-1]
160 full_domain = ''
161 second = levels[-2]
162
161
163 has_third_level = len(levels) > 2
162 result = full_domain
164 if has_third_level:
163 if full_domain:
165 third = levels[-3]
164 levels = full_domain.split('.')
165 if len(levels) >= 2:
166 top = levels[-1]
167 second = levels[-2]
166
168
167 if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS):
169 has_third_level = len(levels) > 2
168 result = '{}.{}.{}'.format(third, second, top)
170 if has_third_level:
169 else:
171 third = levels[-3]
170 result = '{}.{}'.format(second, top)
172
173 if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS):
174 result = '{}.{}.{}'.format(third, second, top)
175 else:
176 result = '{}.{}'.format(second, top)
171
177
172 return result
178 return result
173
179
General Comments 0
You need to be logged in to leave comments. Login now