##// END OF EJS Templates
Strip html tags from the fetched sources description
neko259 -
r1977:7b889029 default
parent child Browse files
Show More
@@ -1,74 +1,77 b''
1 1 import feedparser
2 2 import logging
3 3 import calendar
4 4
5 5 from time import mktime
6 6 from datetime import datetime
7 7
8 8 from django.db import models, transaction
9 9 from django.utils.dateparse import parse_datetime
10 10 from django.utils.timezone import utc
11 11 from django.utils import timezone
12 from django.utils.html import strip_tags
12 13
13 14 from boards.models import Post
14 15 from boards.models.post import TITLE_MAX_LENGTH
15 16 from boards.utils import get_tripcode_from_text
16 17 from boards import settings
17 18
18 19
19 20 SOURCE_TYPE_MAX_LENGTH = 100
20 21 SOURCE_TYPE_RSS = 'RSS'
21 22 TYPE_CHOICES = (
22 23 (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
23 24 )
24 25
25 26
26 27 class ThreadSource(models.Model):
27 28 class Meta:
28 29 app_label = 'boards'
29 30
30 31 name = models.TextField()
31 32 thread = models.ForeignKey('Thread')
32 33 timestamp = models.DateTimeField()
33 34 source = models.TextField()
34 35 source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
35 36 choices=TYPE_CHOICES)
36 37
37 38 def __str__(self):
38 39 return self.name
39 40
40 41 @transaction.atomic
41 42 def fetch_latest_posts(self):
42 43 """Creates new posts with the info fetched since the timestamp."""
43 44 logger = logging.getLogger('boards.source')
44 45
45 46 if self.thread.is_archived():
46 47 logger.error('The thread {} is archived, please try another one'.format(self.thread))
47 48 else:
48 49 tripcode = get_tripcode_from_text(
49 50 settings.get('External', 'SourceFetcherTripcode'))
50 51 start_timestamp = self.timestamp
51 52 last_timestamp = start_timestamp
52 53 logger.info('Start timestamp is {}'.format(start_timestamp))
53 54 if self.thread.is_bumplimit():
54 55 logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
55 56 if self.source_type == SOURCE_TYPE_RSS:
56 57 feed = feedparser.parse(self.source)
57 58 items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
58 59 for item in items:
59 60 title = item.title[:TITLE_MAX_LENGTH]
60 61 timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
61 62 if not timestamp:
62 63 logger.error('Invalid timestamp {} for {}'.format(item.published, title))
63 64 else:
64 65 if timestamp > last_timestamp:
65 66 last_timestamp = timestamp
66 67 if timestamp > start_timestamp:
67 Post.objects.create_post(title=title, text=item.description,
68 Post.objects.create_post(title=title, text=self.parse_text(item.description),
68 69 thread=self.thread, file_urls=[item.link], tripcode=tripcode)
69 70 logger.info('Fetched item {} from {} into thread {}'.format(
70 71 title, self.name, self.thread))
71 72 logger.info('New timestamp is {}'.format(last_timestamp))
72 73 self.timestamp = last_timestamp
73 74 self.save(update_fields=['timestamp'])
74 75
76 def parse_text(self, text):
77 return strip_tags(text)
General Comments 0
You need to be logged in to leave comments. Login now