Show More
@@ -16,7 +16,7 b' from boards.abstracts.settingsmanager im' | |||
|
16 | 16 | from boards.forms.fields import UrlFileField |
|
17 | 17 | from boards.mdx_neboard import formatters |
|
18 | 18 | from boards.models import Tag |
|
19 | from boards.models.attachment.downloaders import download | |
|
19 | from boards.models.attachment.downloaders import download, REGEX_MAGNET | |
|
20 | 20 | from boards.models.post import TITLE_MAX_LENGTH |
|
21 | 21 | from boards.utils import validate_file_size, get_file_mimetype, \ |
|
22 | 22 | FILE_EXTENSION_DELIMITER |
@@ -29,7 +29,7 b' POW_LIFE_MINUTES = 5' | |||
|
29 | 29 | |
|
30 | 30 | REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE) |
|
31 | 31 | REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE) |
|
32 |
REGEX_URL = re.compile(r'^(http|https|ftp |
|
|
32 | REGEX_URL = re.compile(r'^(http|https|ftp):\/\/', re.UNICODE) | |
|
33 | 33 | |
|
34 | 34 | VETERAN_POSTING_DELAY = 5 |
|
35 | 35 | |
@@ -332,7 +332,7 b' class PostForm(NeboardForm):' | |||
|
332 | 332 | self._update_file_extension(file) |
|
333 | 333 | except forms.ValidationError as e: |
|
334 | 334 | # Assume we will get the plain URL instead of a file and save it |
|
335 | if REGEX_URL.match(url): | |
|
335 | if REGEX_URL.match(url) or REGEX_MAGNET.match(url): | |
|
336 | 336 | logger.info('Error in forms: {}'.format(e)) |
|
337 | 337 | return url |
|
338 | 338 | else: |
@@ -12,8 +12,7 b' class Command(BaseCommand):' | |||
|
12 | 12 | print('* Domains and their usage') |
|
13 | 13 | domains = {} |
|
14 | 14 | for attachment in Attachment.objects.exclude(url=''): |
|
15 |
|
|
|
16 | domain = get_domain(full_domain) | |
|
15 | domain = get_domain(attachment.url) | |
|
17 | 16 | if domain in domains: |
|
18 | 17 | domains[domain] += 1 |
|
19 | 18 | else: |
@@ -29,7 +28,7 b' class Command(BaseCommand):' | |||
|
29 | 28 | |
|
30 | 29 | print('* File types') |
|
31 | 30 | mimetypes = Attachment.objects.filter(url='')\ |
|
32 |
|
|
|
33 |
|
|
|
31 | .values('mimetype').annotate(count=Count('id'))\ | |
|
32 | .order_by('-count') | |
|
34 | 33 | for mimetype in mimetypes: |
|
35 | 34 | print('{}: {}'.format(mimetype['mimetype'], mimetype['count'])) |
@@ -1,10 +1,8 b'' | |||
|
1 | import os | |
|
2 | 1 |
|
|
3 | 2 | |
|
4 | from django.core.files.uploadedfile import SimpleUploadedFile, \ | |
|
5 | TemporaryUploadedFile | |
|
3 | import requests | |
|
4 | from django.core.files.uploadedfile import TemporaryUploadedFile | |
|
6 | 5 | from pytube import YouTube |
|
7 | import requests | |
|
8 | 6 | |
|
9 | 7 | from boards.utils import validate_file_size |
|
10 | 8 | |
@@ -17,7 +15,8 b" HEADER_CONTENT_TYPE = 'content-type'" | |||
|
17 | 15 | |
|
18 | 16 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 |
|
19 | 17 | |
|
20 | YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') | |
|
18 | REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') | |
|
19 | REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*') | |
|
21 | 20 | |
|
22 | 21 | TYPE_URL_ONLY = ( |
|
23 | 22 | 'application/xhtml+xml', |
@@ -80,15 +79,18 b' class YouTubeDownloader(Downloader):' | |||
|
80 | 79 | |
|
81 | 80 | @staticmethod |
|
82 | 81 | def handles(url: str) -> bool: |
|
83 | return YOUTUBE_URL.match(url) | |
|
82 | return REGEX_YOUTUBE_URL.match(url) is not None | |
|
84 | 83 | |
|
85 | 84 | |
|
86 | 85 | class NothingDownloader(Downloader): |
|
87 | 86 | @staticmethod |
|
88 | 87 | def handles(url: str) -> bool: |
|
88 | if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url): | |
|
89 | return True | |
|
90 | ||
|
89 | 91 | response_head = requests.head(url, verify=False) |
|
90 | 92 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] |
|
91 |
return content_type in TYPE_URL_ONLY |
|
|
93 | return content_type in TYPE_URL_ONLY | |
|
92 | 94 | |
|
93 | 95 | @staticmethod |
|
94 | 96 | def download(url: str): |
@@ -168,21 +168,24 b' class UrlViewer(AbstractViewer):' | |||
|
168 | 168 | def get_view(self): |
|
169 | 169 | return '<div class="image">' \ |
|
170 | 170 | '{}' \ |
|
171 | '</div>'.format(self.get_format_view()) | |
|
171 | '<div class="image-metadata">{}</div>' \ | |
|
172 | '</div>'.format(self.get_format_view(), get_domain(self.url)) | |
|
172 | 173 | |
|
173 | 174 | def get_format_view(self): |
|
174 | 175 | protocol = self.url.split('://')[0] |
|
175 | full_domain = self.url.split('/')[2] | |
|
176 |
domain = get_domain( |
|
|
176 | ||
|
177 | domain = get_domain(self.url) | |
|
177 | 178 | |
|
178 | 179 | if protocol in URL_PROTOCOLS: |
|
179 | 180 | url_image_name = URL_PROTOCOLS.get(protocol) |
|
180 |
el |
|
|
181 | elif domain: | |
|
181 | 182 | filename = 'images/domains/{}.png'.format(domain) |
|
182 | 183 | if file_exists(filename): |
|
183 | 184 | url_image_name = 'domains/' + domain |
|
184 | 185 | else: |
|
185 | 186 | url_image_name = FILE_STUB_URL |
|
187 | else: | |
|
188 | url_image_name = FILE_STUB_URL | |
|
186 | 189 | |
|
187 | 190 | image_path = 'images/{}.png'.format(url_image_name) |
|
188 | 191 | image = static(image_path) |
@@ -191,3 +194,8 b' class UrlViewer(AbstractViewer):' | |||
|
191 | 194 | return '<a href="{}">' \ |
|
192 | 195 | '<img class="url-image" src="{}" width="{}" height="{}"/>' \ |
|
193 | 196 | '</a>'.format(self.url, image, w, h) |
|
197 | ||
|
198 | ||
|
199 | def _get_protocol(self): | |
|
200 | pass | |
|
201 |
@@ -153,21 +153,27 b' def get_domain(url: str) -> str:' | |||
|
153 | 153 | """ |
|
154 | 154 | Gets domain from an URL with random number of domain levels. |
|
155 | 155 | """ |
|
156 |
|
|
|
157 |
if len( |
|
|
158 | return url | |
|
159 | ||
|
160 | top = levels[-1] | |
|
161 | second = levels[-2] | |
|
156 | domain_parts = url.split('/') | |
|
157 | if len(domain_parts) >= 2: | |
|
158 | full_domain = domain_parts[2] | |
|
159 | else: | |
|
160 | full_domain = '' | |
|
162 | 161 | |
|
163 | has_third_level = len(levels) > 2 | |
|
164 | if has_third_level: | |
|
165 | third = levels[-3] | |
|
162 | result = full_domain | |
|
163 | if full_domain: | |
|
164 | levels = full_domain.split('.') | |
|
165 | if len(levels) >= 2: | |
|
166 | top = levels[-1] | |
|
167 | second = levels[-2] | |
|
166 | 168 | |
|
167 | if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): | |
|
168 | result = '{}.{}.{}'.format(third, second, top) | |
|
169 | else: | |
|
170 | result = '{}.{}'.format(second, top) | |
|
169 | has_third_level = len(levels) > 2 | |
|
170 | if has_third_level: | |
|
171 | third = levels[-3] | |
|
172 | ||
|
173 | if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): | |
|
174 | result = '{}.{}.{}'.format(third, second, top) | |
|
175 | else: | |
|
176 | result = '{}.{}'.format(second, top) | |
|
171 | 177 | |
|
172 | 178 | return result |
|
173 | 179 |
General Comments 0
You need to be logged in to leave comments.
Login now