Show More
@@ -1,74 +1,77 b'' | |||||
1 | import feedparser |
|
1 | import feedparser | |
2 | import logging |
|
2 | import logging | |
3 | import calendar |
|
3 | import calendar | |
4 |
|
4 | |||
5 | from time import mktime |
|
5 | from time import mktime | |
6 | from datetime import datetime |
|
6 | from datetime import datetime | |
7 |
|
7 | |||
8 | from django.db import models, transaction |
|
8 | from django.db import models, transaction | |
9 | from django.utils.dateparse import parse_datetime |
|
9 | from django.utils.dateparse import parse_datetime | |
10 | from django.utils.timezone import utc |
|
10 | from django.utils.timezone import utc | |
11 | from django.utils import timezone |
|
11 | from django.utils import timezone | |
|
12 | from django.utils.html import strip_tags | |||
12 |
|
13 | |||
13 | from boards.models import Post |
|
14 | from boards.models import Post | |
14 | from boards.models.post import TITLE_MAX_LENGTH |
|
15 | from boards.models.post import TITLE_MAX_LENGTH | |
15 | from boards.utils import get_tripcode_from_text |
|
16 | from boards.utils import get_tripcode_from_text | |
16 | from boards import settings |
|
17 | from boards import settings | |
17 |
|
18 | |||
18 |
|
19 | |||
19 | SOURCE_TYPE_MAX_LENGTH = 100 |
|
20 | SOURCE_TYPE_MAX_LENGTH = 100 | |
20 | SOURCE_TYPE_RSS = 'RSS' |
|
21 | SOURCE_TYPE_RSS = 'RSS' | |
21 | TYPE_CHOICES = ( |
|
22 | TYPE_CHOICES = ( | |
22 | (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), |
|
23 | (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), | |
23 | ) |
|
24 | ) | |
24 |
|
25 | |||
25 |
|
26 | |||
26 | class ThreadSource(models.Model): |
|
27 | class ThreadSource(models.Model): | |
27 | class Meta: |
|
28 | class Meta: | |
28 | app_label = 'boards' |
|
29 | app_label = 'boards' | |
29 |
|
30 | |||
30 | name = models.TextField() |
|
31 | name = models.TextField() | |
31 | thread = models.ForeignKey('Thread') |
|
32 | thread = models.ForeignKey('Thread') | |
32 | timestamp = models.DateTimeField() |
|
33 | timestamp = models.DateTimeField() | |
33 | source = models.TextField() |
|
34 | source = models.TextField() | |
34 | source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, |
|
35 | source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, | |
35 | choices=TYPE_CHOICES) |
|
36 | choices=TYPE_CHOICES) | |
36 |
|
37 | |||
37 | def __str__(self): |
|
38 | def __str__(self): | |
38 | return self.name |
|
39 | return self.name | |
39 |
|
40 | |||
40 | @transaction.atomic |
|
41 | @transaction.atomic | |
41 | def fetch_latest_posts(self): |
|
42 | def fetch_latest_posts(self): | |
42 | """Creates new posts with the info fetched since the timestamp.""" |
|
43 | """Creates new posts with the info fetched since the timestamp.""" | |
43 | logger = logging.getLogger('boards.source') |
|
44 | logger = logging.getLogger('boards.source') | |
44 |
|
45 | |||
45 | if self.thread.is_archived(): |
|
46 | if self.thread.is_archived(): | |
46 | logger.error('The thread {} is archived, please try another one'.format(self.thread)) |
|
47 | logger.error('The thread {} is archived, please try another one'.format(self.thread)) | |
47 | else: |
|
48 | else: | |
48 | tripcode = get_tripcode_from_text( |
|
49 | tripcode = get_tripcode_from_text( | |
49 | settings.get('External', 'SourceFetcherTripcode')) |
|
50 | settings.get('External', 'SourceFetcherTripcode')) | |
50 | start_timestamp = self.timestamp |
|
51 | start_timestamp = self.timestamp | |
51 | last_timestamp = start_timestamp |
|
52 | last_timestamp = start_timestamp | |
52 | logger.info('Start timestamp is {}'.format(start_timestamp)) |
|
53 | logger.info('Start timestamp is {}'.format(start_timestamp)) | |
53 | if self.thread.is_bumplimit(): |
|
54 | if self.thread.is_bumplimit(): | |
54 | logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) |
|
55 | logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) | |
55 | if self.source_type == SOURCE_TYPE_RSS: |
|
56 | if self.source_type == SOURCE_TYPE_RSS: | |
56 | feed = feedparser.parse(self.source) |
|
57 | feed = feedparser.parse(self.source) | |
57 | items = sorted(feed.entries, key=lambda entry: entry.published_parsed) |
|
58 | items = sorted(feed.entries, key=lambda entry: entry.published_parsed) | |
58 | for item in items: |
|
59 | for item in items: | |
59 | title = item.title[:TITLE_MAX_LENGTH] |
|
60 | title = item.title[:TITLE_MAX_LENGTH] | |
60 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) |
|
61 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) | |
61 | if not timestamp: |
|
62 | if not timestamp: | |
62 | logger.error('Invalid timestamp {} for {}'.format(item.published, title)) |
|
63 | logger.error('Invalid timestamp {} for {}'.format(item.published, title)) | |
63 | else: |
|
64 | else: | |
64 | if timestamp > last_timestamp: |
|
65 | if timestamp > last_timestamp: | |
65 | last_timestamp = timestamp |
|
66 | last_timestamp = timestamp | |
66 | if timestamp > start_timestamp: |
|
67 | if timestamp > start_timestamp: | |
67 | Post.objects.create_post(title=title, text=item.description, |
|
68 | Post.objects.create_post(title=title, text=self.parse_text(item.description), | |
68 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) |
|
69 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) | |
69 | logger.info('Fetched item {} from {} into thread {}'.format( |
|
70 | logger.info('Fetched item {} from {} into thread {}'.format( | |
70 | title, self.name, self.thread)) |
|
71 | title, self.name, self.thread)) | |
71 | logger.info('New timestamp is {}'.format(last_timestamp)) |
|
72 | logger.info('New timestamp is {}'.format(last_timestamp)) | |
72 | self.timestamp = last_timestamp |
|
73 | self.timestamp = last_timestamp | |
73 | self.save(update_fields=['timestamp']) |
|
74 | self.save(update_fields=['timestamp']) | |
74 |
|
75 | |||
|
76 | def parse_text(self, text): | |||
|
77 | return strip_tags(text) |
General Comments 0
You need to be logged in to leave comments.
Login now