import feedparser import logging import calendar from time import mktime from datetime import datetime from django.db import models, transaction from django.utils.dateparse import parse_datetime from django.utils.timezone import utc from django.utils import timezone from django.utils.html import strip_tags from boards.models import Post from boards.models.post import TITLE_MAX_LENGTH from boards.utils import get_tripcode_from_text from boards import settings SOURCE_TYPE_MAX_LENGTH = 100 SOURCE_TYPE_RSS = 'RSS' TYPE_CHOICES = ( (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), ) class ThreadSource(models.Model): class Meta: app_label = 'boards' name = models.TextField() thread = models.ForeignKey('Thread') timestamp = models.DateTimeField() source = models.TextField() source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, choices=TYPE_CHOICES) def __str__(self): return self.name @transaction.atomic def fetch_latest_posts(self): """Creates new posts with the info fetched since the timestamp.""" logger = logging.getLogger('boards.source') if self.thread.is_archived(): logger.error('The thread {} is archived, please try another one'.format(self.thread)) else: tripcode = get_tripcode_from_text( settings.get('External', 'SourceFetcherTripcode')) start_timestamp = self.timestamp last_timestamp = start_timestamp logger.info('Start timestamp is {}'.format(start_timestamp)) if self.thread.is_bumplimit(): logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) if self.source_type == SOURCE_TYPE_RSS: feed = feedparser.parse(self.source) items = sorted(feed.entries, key=lambda entry: entry.published_parsed) for item in items: title = item.title[:TITLE_MAX_LENGTH] timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) if not timestamp: logger.error('Invalid timestamp {} for {}'.format(item.published, title)) else: if timestamp > last_timestamp: last_timestamp = timestamp if timestamp > start_timestamp: Post.objects.create_post(title=title, text=self.parse_text(item.description), thread=self.thread, file_urls=[item.link], tripcode=tripcode) logger.info('Fetched item {} from {} into thread {}'.format( title, self.name, self.thread)) logger.info('New timestamp is {}'.format(last_timestamp)) self.timestamp = last_timestamp self.save(update_fields=['timestamp']) def parse_text(self, text): return strip_tags(text)