##// END OF EJS Templates
Serialize timestamp before passing to tag. Fail if the passed timestamp is invalid and cannot be parsed
Serialize timestamp before passing to tag. Fail if the passed timestamp is invalid and cannot be parsed

File last commit:

r2008:5d3c1f7e default
r2122:edce8526 default
Show More
source.py
85 lines | 3.2 KiB | text/x-python | PythonLexer
import feedparser
import logging
import calendar
from time import mktime
from datetime import datetime
from django.db import models, transaction
from django.utils.dateparse import parse_datetime
from django.utils.timezone import utc
from django.utils import timezone
from django.utils.html import strip_tags
from boards.models import Post
from boards.models.post import TITLE_MAX_LENGTH
from boards.settings import SECTION_EXTERNAL
from boards.utils import get_tripcode_from_text
from boards import settings
SOURCE_TYPE_MAX_LENGTH = 100
SOURCE_TYPE_RSS = 'RSS'
TYPE_CHOICES = (
(SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
)
class ThreadSource(models.Model):
class Meta:
app_label = 'boards'
name = models.TextField()
thread = models.ForeignKey('Thread', on_delete=models.CASCADE)
timestamp = models.DateTimeField()
source = models.TextField()
source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
choices=TYPE_CHOICES)
def __str__(self):
return self.name
@transaction.atomic
def fetch_latest_posts(self):
"""Creates new posts with the info fetched since the timestamp."""
logger = logging.getLogger('boards.source')
if self.thread.is_archived():
logger.error('The thread {} is archived, please try another one'.format(self.thread))
else:
tripcode = get_tripcode_from_text(
settings.get(SECTION_EXTERNAL, 'SourceFetcherTripcode'))
start_timestamp = self.timestamp
last_timestamp = start_timestamp
logger.info('Start timestamp is {}'.format(start_timestamp))
if self.thread.is_bumplimit():
logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
if self.source_type == SOURCE_TYPE_RSS:
feed = feedparser.parse(self.source)
items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
for item in items:
title = self.strip_title(item.title, TITLE_MAX_LENGTH)
timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
if not timestamp:
logger.error('Invalid timestamp {} for {}'.format(item.published, title))
else:
if timestamp > last_timestamp:
last_timestamp = timestamp
if timestamp > start_timestamp:
Post.objects.create_post(title=title, text=self.parse_text(item.description),
thread=self.thread, file_urls=[item.link], tripcode=tripcode)
logger.info('Fetched item {} from {} into thread {}'.format(
title, self.name, self.thread))
logger.info('New timestamp is {}'.format(last_timestamp))
self.timestamp = last_timestamp
self.save(update_fields=['timestamp'])
def parse_text(self, text):
return strip_tags(text)
def strip_title(self, title, max_length):
result = title
if len(title) > max_length:
result = title[:max_length - 1] + '…'
return result