##// END OF EJS Templates
Show domain next to URL if available
neko259 -
r1765:7a6a61e1 default
parent child Browse files
Show More
@@ -16,7 +16,7 b' from boards.abstracts.settingsmanager im'
16 16 from boards.forms.fields import UrlFileField
17 17 from boards.mdx_neboard import formatters
18 18 from boards.models import Tag
19 from boards.models.attachment.downloaders import download
19 from boards.models.attachment.downloaders import download, REGEX_MAGNET
20 20 from boards.models.post import TITLE_MAX_LENGTH
21 21 from boards.utils import validate_file_size, get_file_mimetype, \
22 22 FILE_EXTENSION_DELIMITER
@@ -29,7 +29,7 b' POW_LIFE_MINUTES = 5'
29 29
30 30 REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE)
31 31 REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE)
32 REGEX_URL = re.compile(r'^(http|https|ftp|magnet):\/\/', re.UNICODE)
32 REGEX_URL = re.compile(r'^(http|https|ftp):\/\/', re.UNICODE)
33 33
34 34 VETERAN_POSTING_DELAY = 5
35 35
@@ -332,7 +332,7 b' class PostForm(NeboardForm):'
332 332 self._update_file_extension(file)
333 333 except forms.ValidationError as e:
334 334 # Assume we will get the plain URL instead of a file and save it
335 if REGEX_URL.match(url):
335 if REGEX_URL.match(url) or REGEX_MAGNET.match(url):
336 336 logger.info('Error in forms: {}'.format(e))
337 337 return url
338 338 else:
@@ -12,8 +12,7 b' class Command(BaseCommand):'
12 12 print('* Domains and their usage')
13 13 domains = {}
14 14 for attachment in Attachment.objects.exclude(url=''):
15 full_domain = attachment.url.split('/')[2]
16 domain = get_domain(full_domain)
15 domain = get_domain(attachment.url)
17 16 if domain in domains:
18 17 domains[domain] += 1
19 18 else:
@@ -29,7 +28,7 b' class Command(BaseCommand):'
29 28
30 29 print('* File types')
31 30 mimetypes = Attachment.objects.filter(url='')\
32 .values('mimetype').annotate(count=Count('id'))\
33 .order_by('-count')
31 .values('mimetype').annotate(count=Count('id'))\
32 .order_by('-count')
34 33 for mimetype in mimetypes:
35 34 print('{}: {}'.format(mimetype['mimetype'], mimetype['count']))
@@ -1,10 +1,8 b''
1 import os
2 1 import re
3 2
4 from django.core.files.uploadedfile import SimpleUploadedFile, \
5 TemporaryUploadedFile
3 import requests
4 from django.core.files.uploadedfile import TemporaryUploadedFile
6 5 from pytube import YouTube
7 import requests
8 6
9 7 from boards.utils import validate_file_size
10 8
@@ -17,7 +15,8 b" HEADER_CONTENT_TYPE = 'content-type'"
17 15
18 16 FILE_DOWNLOAD_CHUNK_BYTES = 200000
19 17
20 YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
18 REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+')
19 REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*')
21 20
22 21 TYPE_URL_ONLY = (
23 22 'application/xhtml+xml',
@@ -80,15 +79,18 b' class YouTubeDownloader(Downloader):'
80 79
81 80 @staticmethod
82 81 def handles(url: str) -> bool:
83 return YOUTUBE_URL.match(url)
82 return REGEX_YOUTUBE_URL.match(url) is not None
84 83
85 84
86 85 class NothingDownloader(Downloader):
87 86 @staticmethod
88 87 def handles(url: str) -> bool:
88 if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url):
89 return True
90
89 91 response_head = requests.head(url, verify=False)
90 92 content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0]
91 return content_type in TYPE_URL_ONLY and not YOUTUBE_URL.match(url)
93 return content_type in TYPE_URL_ONLY
92 94
93 95 @staticmethod
94 96 def download(url: str):
@@ -168,21 +168,24 b' class UrlViewer(AbstractViewer):'
168 168 def get_view(self):
169 169 return '<div class="image">' \
170 170 '{}' \
171 '</div>'.format(self.get_format_view())
171 '<div class="image-metadata">{}</div>' \
172 '</div>'.format(self.get_format_view(), get_domain(self.url))
172 173
173 174 def get_format_view(self):
174 175 protocol = self.url.split('://')[0]
175 full_domain = self.url.split('/')[2]
176 domain = get_domain(full_domain)
176
177 domain = get_domain(self.url)
177 178
178 179 if protocol in URL_PROTOCOLS:
179 180 url_image_name = URL_PROTOCOLS.get(protocol)
180 else:
181 elif domain:
181 182 filename = 'images/domains/{}.png'.format(domain)
182 183 if file_exists(filename):
183 184 url_image_name = 'domains/' + domain
184 185 else:
185 186 url_image_name = FILE_STUB_URL
187 else:
188 url_image_name = FILE_STUB_URL
186 189
187 190 image_path = 'images/{}.png'.format(url_image_name)
188 191 image = static(image_path)
@@ -191,3 +194,8 b' class UrlViewer(AbstractViewer):'
191 194 return '<a href="{}">' \
192 195 '<img class="url-image" src="{}" width="{}" height="{}"/>' \
193 196 '</a>'.format(self.url, image, w, h)
197
198
199 def _get_protocol(self):
200 pass
201
@@ -153,21 +153,27 b' def get_domain(url: str) -> str:'
153 153 """
154 154 Gets domain from an URL with random number of domain levels.
155 155 """
156 levels = url.split('.')
157 if len(levels) < 2:
158 return url
159
160 top = levels[-1]
161 second = levels[-2]
156 domain_parts = url.split('/')
157 if len(domain_parts) >= 2:
158 full_domain = domain_parts[2]
159 else:
160 full_domain = ''
162 161
163 has_third_level = len(levels) > 2
164 if has_third_level:
165 third = levels[-3]
162 result = full_domain
163 if full_domain:
164 levels = full_domain.split('.')
165 if len(levels) >= 2:
166 top = levels[-1]
167 second = levels[-2]
166 168
167 if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS):
168 result = '{}.{}.{}'.format(third, second, top)
169 else:
170 result = '{}.{}'.format(second, top)
169 has_third_level = len(levels) > 2
170 if has_third_level:
171 third = levels[-3]
172
173 if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS):
174 result = '{}.{}.{}'.format(third, second, top)
175 else:
176 result = '{}.{}'.format(second, top)
171 177
172 178 return result
173 179
General Comments 0
You need to be logged in to leave comments. Login now