Show More
@@ -16,7 +16,7 b' from boards.abstracts.settingsmanager im' | |||||
16 | from boards.forms.fields import UrlFileField |
|
16 | from boards.forms.fields import UrlFileField | |
17 | from boards.mdx_neboard import formatters |
|
17 | from boards.mdx_neboard import formatters | |
18 | from boards.models import Tag |
|
18 | from boards.models import Tag | |
19 | from boards.models.attachment.downloaders import download |
|
19 | from boards.models.attachment.downloaders import download, REGEX_MAGNET | |
20 | from boards.models.post import TITLE_MAX_LENGTH |
|
20 | from boards.models.post import TITLE_MAX_LENGTH | |
21 | from boards.utils import validate_file_size, get_file_mimetype, \ |
|
21 | from boards.utils import validate_file_size, get_file_mimetype, \ | |
22 | FILE_EXTENSION_DELIMITER |
|
22 | FILE_EXTENSION_DELIMITER | |
@@ -29,7 +29,7 b' POW_LIFE_MINUTES = 5' | |||||
29 |
|
29 | |||
30 | REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE) |
|
30 | REGEX_TAGS = re.compile(r'^[\w\s\d]+$', re.UNICODE) | |
31 | REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE) |
|
31 | REGEX_USERNAMES = re.compile(r'^[\w\s\d,]+$', re.UNICODE) | |
32 |
REGEX_URL = re.compile(r'^(http|https|ftp |
|
32 | REGEX_URL = re.compile(r'^(http|https|ftp):\/\/', re.UNICODE) | |
33 |
|
33 | |||
34 | VETERAN_POSTING_DELAY = 5 |
|
34 | VETERAN_POSTING_DELAY = 5 | |
35 |
|
35 | |||
@@ -332,7 +332,7 b' class PostForm(NeboardForm):' | |||||
332 | self._update_file_extension(file) |
|
332 | self._update_file_extension(file) | |
333 | except forms.ValidationError as e: |
|
333 | except forms.ValidationError as e: | |
334 | # Assume we will get the plain URL instead of a file and save it |
|
334 | # Assume we will get the plain URL instead of a file and save it | |
335 | if REGEX_URL.match(url): |
|
335 | if REGEX_URL.match(url) or REGEX_MAGNET.match(url): | |
336 | logger.info('Error in forms: {}'.format(e)) |
|
336 | logger.info('Error in forms: {}'.format(e)) | |
337 | return url |
|
337 | return url | |
338 | else: |
|
338 | else: |
@@ -12,8 +12,7 b' class Command(BaseCommand):' | |||||
12 | print('* Domains and their usage') |
|
12 | print('* Domains and their usage') | |
13 | domains = {} |
|
13 | domains = {} | |
14 | for attachment in Attachment.objects.exclude(url=''): |
|
14 | for attachment in Attachment.objects.exclude(url=''): | |
15 |
|
|
15 | domain = get_domain(attachment.url) | |
16 | domain = get_domain(full_domain) |
|
|||
17 | if domain in domains: |
|
16 | if domain in domains: | |
18 | domains[domain] += 1 |
|
17 | domains[domain] += 1 | |
19 | else: |
|
18 | else: | |
@@ -29,7 +28,7 b' class Command(BaseCommand):' | |||||
29 |
|
28 | |||
30 | print('* File types') |
|
29 | print('* File types') | |
31 | mimetypes = Attachment.objects.filter(url='')\ |
|
30 | mimetypes = Attachment.objects.filter(url='')\ | |
32 |
|
|
31 | .values('mimetype').annotate(count=Count('id'))\ | |
33 |
|
|
32 | .order_by('-count') | |
34 | for mimetype in mimetypes: |
|
33 | for mimetype in mimetypes: | |
35 | print('{}: {}'.format(mimetype['mimetype'], mimetype['count'])) |
|
34 | print('{}: {}'.format(mimetype['mimetype'], mimetype['count'])) |
@@ -1,10 +1,8 b'' | |||||
1 | import os |
|
|||
2 |
|
|
1 | import re | |
3 |
|
2 | |||
4 | from django.core.files.uploadedfile import SimpleUploadedFile, \ |
|
3 | import requests | |
5 | TemporaryUploadedFile |
|
4 | from django.core.files.uploadedfile import TemporaryUploadedFile | |
6 | from pytube import YouTube |
|
5 | from pytube import YouTube | |
7 | import requests |
|
|||
8 |
|
6 | |||
9 | from boards.utils import validate_file_size |
|
7 | from boards.utils import validate_file_size | |
10 |
|
8 | |||
@@ -17,7 +15,8 b" HEADER_CONTENT_TYPE = 'content-type'" | |||||
17 |
|
15 | |||
18 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 |
|
16 | FILE_DOWNLOAD_CHUNK_BYTES = 200000 | |
19 |
|
17 | |||
20 | YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') |
|
18 | REGEX_YOUTUBE_URL = re.compile(r'https?://((www\.)?youtube\.com/watch\?v=|youtu.be/)[-\w]+') | |
|
19 | REGEX_MAGNET = re.compile(r'magnet:\?xt=urn:(btih:)?[a-z0-9]{20,50}.*') | |||
21 |
|
20 | |||
22 | TYPE_URL_ONLY = ( |
|
21 | TYPE_URL_ONLY = ( | |
23 | 'application/xhtml+xml', |
|
22 | 'application/xhtml+xml', | |
@@ -80,15 +79,18 b' class YouTubeDownloader(Downloader):' | |||||
80 |
|
79 | |||
81 | @staticmethod |
|
80 | @staticmethod | |
82 | def handles(url: str) -> bool: |
|
81 | def handles(url: str) -> bool: | |
83 | return YOUTUBE_URL.match(url) |
|
82 | return REGEX_YOUTUBE_URL.match(url) is not None | |
84 |
|
83 | |||
85 |
|
84 | |||
86 | class NothingDownloader(Downloader): |
|
85 | class NothingDownloader(Downloader): | |
87 | @staticmethod |
|
86 | @staticmethod | |
88 | def handles(url: str) -> bool: |
|
87 | def handles(url: str) -> bool: | |
|
88 | if REGEX_MAGNET.match(url) or REGEX_YOUTUBE_URL.match(url): | |||
|
89 | return True | |||
|
90 | ||||
89 | response_head = requests.head(url, verify=False) |
|
91 | response_head = requests.head(url, verify=False) | |
90 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] |
|
92 | content_type = response_head.headers[HEADER_CONTENT_TYPE].split(';')[0] | |
91 |
return content_type in TYPE_URL_ONLY |
|
93 | return content_type in TYPE_URL_ONLY | |
92 |
|
94 | |||
93 | @staticmethod |
|
95 | @staticmethod | |
94 | def download(url: str): |
|
96 | def download(url: str): |
@@ -168,21 +168,24 b' class UrlViewer(AbstractViewer):' | |||||
168 | def get_view(self): |
|
168 | def get_view(self): | |
169 | return '<div class="image">' \ |
|
169 | return '<div class="image">' \ | |
170 | '{}' \ |
|
170 | '{}' \ | |
171 | '</div>'.format(self.get_format_view()) |
|
171 | '<div class="image-metadata">{}</div>' \ | |
|
172 | '</div>'.format(self.get_format_view(), get_domain(self.url)) | |||
172 |
|
173 | |||
173 | def get_format_view(self): |
|
174 | def get_format_view(self): | |
174 | protocol = self.url.split('://')[0] |
|
175 | protocol = self.url.split('://')[0] | |
175 | full_domain = self.url.split('/')[2] |
|
176 | ||
176 |
domain = get_domain( |
|
177 | domain = get_domain(self.url) | |
177 |
|
178 | |||
178 | if protocol in URL_PROTOCOLS: |
|
179 | if protocol in URL_PROTOCOLS: | |
179 | url_image_name = URL_PROTOCOLS.get(protocol) |
|
180 | url_image_name = URL_PROTOCOLS.get(protocol) | |
180 |
el |
|
181 | elif domain: | |
181 | filename = 'images/domains/{}.png'.format(domain) |
|
182 | filename = 'images/domains/{}.png'.format(domain) | |
182 | if file_exists(filename): |
|
183 | if file_exists(filename): | |
183 | url_image_name = 'domains/' + domain |
|
184 | url_image_name = 'domains/' + domain | |
184 | else: |
|
185 | else: | |
185 | url_image_name = FILE_STUB_URL |
|
186 | url_image_name = FILE_STUB_URL | |
|
187 | else: | |||
|
188 | url_image_name = FILE_STUB_URL | |||
186 |
|
189 | |||
187 | image_path = 'images/{}.png'.format(url_image_name) |
|
190 | image_path = 'images/{}.png'.format(url_image_name) | |
188 | image = static(image_path) |
|
191 | image = static(image_path) | |
@@ -191,3 +194,8 b' class UrlViewer(AbstractViewer):' | |||||
191 | return '<a href="{}">' \ |
|
194 | return '<a href="{}">' \ | |
192 | '<img class="url-image" src="{}" width="{}" height="{}"/>' \ |
|
195 | '<img class="url-image" src="{}" width="{}" height="{}"/>' \ | |
193 | '</a>'.format(self.url, image, w, h) |
|
196 | '</a>'.format(self.url, image, w, h) | |
|
197 | ||||
|
198 | ||||
|
199 | def _get_protocol(self): | |||
|
200 | pass | |||
|
201 |
@@ -153,21 +153,27 b' def get_domain(url: str) -> str:' | |||||
153 | """ |
|
153 | """ | |
154 | Gets domain from an URL with random number of domain levels. |
|
154 | Gets domain from an URL with random number of domain levels. | |
155 | """ |
|
155 | """ | |
156 |
|
|
156 | domain_parts = url.split('/') | |
157 |
if len( |
|
157 | if len(domain_parts) >= 2: | |
158 | return url |
|
158 | full_domain = domain_parts[2] | |
159 |
|
159 | else: | ||
160 | top = levels[-1] |
|
160 | full_domain = '' | |
161 | second = levels[-2] |
|
|||
162 |
|
161 | |||
163 | has_third_level = len(levels) > 2 |
|
162 | result = full_domain | |
164 | if has_third_level: |
|
163 | if full_domain: | |
165 | third = levels[-3] |
|
164 | levels = full_domain.split('.') | |
|
165 | if len(levels) >= 2: | |||
|
166 | top = levels[-1] | |||
|
167 | second = levels[-2] | |||
166 |
|
168 | |||
167 | if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): |
|
169 | has_third_level = len(levels) > 2 | |
168 | result = '{}.{}.{}'.format(third, second, top) |
|
170 | if has_third_level: | |
169 | else: |
|
171 | third = levels[-3] | |
170 | result = '{}.{}'.format(second, top) |
|
172 | ||
|
173 | if has_third_level and ('{}.{}'.format(second, top) in KNOWN_DOMAINS): | |||
|
174 | result = '{}.{}.{}'.format(third, second, top) | |||
|
175 | else: | |||
|
176 | result = '{}.{}'.format(second, top) | |||
171 |
|
177 | |||
172 | return result |
|
178 | return result | |
173 |
|
179 |
General Comments 0
You need to be logged in to leave comments.
Login now