Show More
@@ -1,77 +1,84 b'' | |||||
1 | import feedparser |
|
1 | import feedparser | |
2 | import logging |
|
2 | import logging | |
3 | import calendar |
|
3 | import calendar | |
4 |
|
4 | |||
5 | from time import mktime |
|
5 | from time import mktime | |
6 | from datetime import datetime |
|
6 | from datetime import datetime | |
7 |
|
7 | |||
8 | from django.db import models, transaction |
|
8 | from django.db import models, transaction | |
9 | from django.utils.dateparse import parse_datetime |
|
9 | from django.utils.dateparse import parse_datetime | |
10 | from django.utils.timezone import utc |
|
10 | from django.utils.timezone import utc | |
11 | from django.utils import timezone |
|
11 | from django.utils import timezone | |
12 | from django.utils.html import strip_tags |
|
12 | from django.utils.html import strip_tags | |
13 |
|
13 | |||
14 | from boards.models import Post |
|
14 | from boards.models import Post | |
15 | from boards.models.post import TITLE_MAX_LENGTH |
|
15 | from boards.models.post import TITLE_MAX_LENGTH | |
16 | from boards.utils import get_tripcode_from_text |
|
16 | from boards.utils import get_tripcode_from_text | |
17 | from boards import settings |
|
17 | from boards import settings | |
18 |
|
18 | |||
19 |
|
19 | |||
20 | SOURCE_TYPE_MAX_LENGTH = 100 |
|
20 | SOURCE_TYPE_MAX_LENGTH = 100 | |
21 | SOURCE_TYPE_RSS = 'RSS' |
|
21 | SOURCE_TYPE_RSS = 'RSS' | |
22 | TYPE_CHOICES = ( |
|
22 | TYPE_CHOICES = ( | |
23 | (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), |
|
23 | (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), | |
24 | ) |
|
24 | ) | |
25 |
|
25 | |||
26 |
|
26 | |||
27 | class ThreadSource(models.Model): |
|
27 | class ThreadSource(models.Model): | |
28 | class Meta: |
|
28 | class Meta: | |
29 | app_label = 'boards' |
|
29 | app_label = 'boards' | |
30 |
|
30 | |||
31 | name = models.TextField() |
|
31 | name = models.TextField() | |
32 | thread = models.ForeignKey('Thread') |
|
32 | thread = models.ForeignKey('Thread') | |
33 | timestamp = models.DateTimeField() |
|
33 | timestamp = models.DateTimeField() | |
34 | source = models.TextField() |
|
34 | source = models.TextField() | |
35 | source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, |
|
35 | source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, | |
36 | choices=TYPE_CHOICES) |
|
36 | choices=TYPE_CHOICES) | |
37 |
|
37 | |||
38 | def __str__(self): |
|
38 | def __str__(self): | |
39 | return self.name |
|
39 | return self.name | |
40 |
|
40 | |||
41 | @transaction.atomic |
|
41 | @transaction.atomic | |
42 | def fetch_latest_posts(self): |
|
42 | def fetch_latest_posts(self): | |
43 | """Creates new posts with the info fetched since the timestamp.""" |
|
43 | """Creates new posts with the info fetched since the timestamp.""" | |
44 | logger = logging.getLogger('boards.source') |
|
44 | logger = logging.getLogger('boards.source') | |
45 |
|
45 | |||
46 | if self.thread.is_archived(): |
|
46 | if self.thread.is_archived(): | |
47 | logger.error('The thread {} is archived, please try another one'.format(self.thread)) |
|
47 | logger.error('The thread {} is archived, please try another one'.format(self.thread)) | |
48 | else: |
|
48 | else: | |
49 | tripcode = get_tripcode_from_text( |
|
49 | tripcode = get_tripcode_from_text( | |
50 | settings.get('External', 'SourceFetcherTripcode')) |
|
50 | settings.get('External', 'SourceFetcherTripcode')) | |
51 | start_timestamp = self.timestamp |
|
51 | start_timestamp = self.timestamp | |
52 | last_timestamp = start_timestamp |
|
52 | last_timestamp = start_timestamp | |
53 | logger.info('Start timestamp is {}'.format(start_timestamp)) |
|
53 | logger.info('Start timestamp is {}'.format(start_timestamp)) | |
54 | if self.thread.is_bumplimit(): |
|
54 | if self.thread.is_bumplimit(): | |
55 | logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) |
|
55 | logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) | |
56 | if self.source_type == SOURCE_TYPE_RSS: |
|
56 | if self.source_type == SOURCE_TYPE_RSS: | |
57 | feed = feedparser.parse(self.source) |
|
57 | feed = feedparser.parse(self.source) | |
58 | items = sorted(feed.entries, key=lambda entry: entry.published_parsed) |
|
58 | items = sorted(feed.entries, key=lambda entry: entry.published_parsed) | |
59 | for item in items: |
|
59 | for item in items: | |
60 |
title = item.title |
|
60 | title = self.strip_title(item.title, TITLE_MAX_LENGTH) | |
61 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) |
|
61 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) | |
62 | if not timestamp: |
|
62 | if not timestamp: | |
63 | logger.error('Invalid timestamp {} for {}'.format(item.published, title)) |
|
63 | logger.error('Invalid timestamp {} for {}'.format(item.published, title)) | |
64 | else: |
|
64 | else: | |
65 | if timestamp > last_timestamp: |
|
65 | if timestamp > last_timestamp: | |
66 | last_timestamp = timestamp |
|
66 | last_timestamp = timestamp | |
67 | if timestamp > start_timestamp: |
|
67 | if timestamp > start_timestamp: | |
68 | Post.objects.create_post(title=title, text=self.parse_text(item.description), |
|
68 | Post.objects.create_post(title=title, text=self.parse_text(item.description), | |
69 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) |
|
69 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) | |
70 | logger.info('Fetched item {} from {} into thread {}'.format( |
|
70 | logger.info('Fetched item {} from {} into thread {}'.format( | |
71 | title, self.name, self.thread)) |
|
71 | title, self.name, self.thread)) | |
72 | logger.info('New timestamp is {}'.format(last_timestamp)) |
|
72 | logger.info('New timestamp is {}'.format(last_timestamp)) | |
73 | self.timestamp = last_timestamp |
|
73 | self.timestamp = last_timestamp | |
74 | self.save(update_fields=['timestamp']) |
|
74 | self.save(update_fields=['timestamp']) | |
75 |
|
75 | |||
76 | def parse_text(self, text): |
|
76 | def parse_text(self, text): | |
77 | return strip_tags(text) |
|
77 | return strip_tags(text) | |
|
78 | ||||
|
79 | def strip_title(self, title, max_length): | |||
|
80 | result = title | |||
|
81 | if len(title) > max_length: | |||
|
82 | result = title[:max_length - 1] + '…' | |||
|
83 | return result | |||
|
84 |
General Comments 0
You need to be logged in to leave comments.
Login now