Show More
@@ -1,74 +1,77 b'' | |||
|
1 | 1 | import feedparser |
|
2 | 2 | import logging |
|
3 | 3 | import calendar |
|
4 | 4 | |
|
5 | 5 | from time import mktime |
|
6 | 6 | from datetime import datetime |
|
7 | 7 | |
|
8 | 8 | from django.db import models, transaction |
|
9 | 9 | from django.utils.dateparse import parse_datetime |
|
10 | 10 | from django.utils.timezone import utc |
|
11 | 11 | from django.utils import timezone |
|
12 | from django.utils.html import strip_tags | |
|
12 | 13 | |
|
13 | 14 | from boards.models import Post |
|
14 | 15 | from boards.models.post import TITLE_MAX_LENGTH |
|
15 | 16 | from boards.utils import get_tripcode_from_text |
|
16 | 17 | from boards import settings |
|
17 | 18 | |
|
18 | 19 | |
|
19 | 20 | SOURCE_TYPE_MAX_LENGTH = 100 |
|
20 | 21 | SOURCE_TYPE_RSS = 'RSS' |
|
21 | 22 | TYPE_CHOICES = ( |
|
22 | 23 | (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), |
|
23 | 24 | ) |
|
24 | 25 | |
|
25 | 26 | |
|
26 | 27 | class ThreadSource(models.Model): |
|
27 | 28 | class Meta: |
|
28 | 29 | app_label = 'boards' |
|
29 | 30 | |
|
30 | 31 | name = models.TextField() |
|
31 | 32 | thread = models.ForeignKey('Thread') |
|
32 | 33 | timestamp = models.DateTimeField() |
|
33 | 34 | source = models.TextField() |
|
34 | 35 | source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, |
|
35 | 36 | choices=TYPE_CHOICES) |
|
36 | 37 | |
|
37 | 38 | def __str__(self): |
|
38 | 39 | return self.name |
|
39 | 40 | |
|
40 | 41 | @transaction.atomic |
|
41 | 42 | def fetch_latest_posts(self): |
|
42 | 43 | """Creates new posts with the info fetched since the timestamp.""" |
|
43 | 44 | logger = logging.getLogger('boards.source') |
|
44 | 45 | |
|
45 | 46 | if self.thread.is_archived(): |
|
46 | 47 | logger.error('The thread {} is archived, please try another one'.format(self.thread)) |
|
47 | 48 | else: |
|
48 | 49 | tripcode = get_tripcode_from_text( |
|
49 | 50 | settings.get('External', 'SourceFetcherTripcode')) |
|
50 | 51 | start_timestamp = self.timestamp |
|
51 | 52 | last_timestamp = start_timestamp |
|
52 | 53 | logger.info('Start timestamp is {}'.format(start_timestamp)) |
|
53 | 54 | if self.thread.is_bumplimit(): |
|
54 | 55 | logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) |
|
55 | 56 | if self.source_type == SOURCE_TYPE_RSS: |
|
56 | 57 | feed = feedparser.parse(self.source) |
|
57 | 58 | items = sorted(feed.entries, key=lambda entry: entry.published_parsed) |
|
58 | 59 | for item in items: |
|
59 | 60 | title = item.title[:TITLE_MAX_LENGTH] |
|
60 | 61 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) |
|
61 | 62 | if not timestamp: |
|
62 | 63 | logger.error('Invalid timestamp {} for {}'.format(item.published, title)) |
|
63 | 64 | else: |
|
64 | 65 | if timestamp > last_timestamp: |
|
65 | 66 | last_timestamp = timestamp |
|
66 | 67 | if timestamp > start_timestamp: |
|
67 | Post.objects.create_post(title=title, text=item.description, | |
|
68 | Post.objects.create_post(title=title, text=self.parse_text(item.description), | |
|
68 | 69 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) |
|
69 | 70 | logger.info('Fetched item {} from {} into thread {}'.format( |
|
70 | 71 | title, self.name, self.thread)) |
|
71 | 72 | logger.info('New timestamp is {}'.format(last_timestamp)) |
|
72 | 73 | self.timestamp = last_timestamp |
|
73 | 74 | self.save(update_fields=['timestamp']) |
|
74 | 75 | |
|
76 | def parse_text(self, text): | |
|
77 | return strip_tags(text) |
General Comments 0
You need to be logged in to leave comments.
Login now