##// END OF EJS Templates
Use proper settings for max landing threads. Show thread last update time instead of number of posts
Use proper settings for max landing threads. Show thread last update time instead of number of posts

File last commit:

r1986:0b41439a default
r2001:6d66389f default
Show More
source.py
84 lines | 3.2 KiB | text/x-python | PythonLexer
import feedparser
import logging
import calendar
from time import mktime
from datetime import datetime
from django.db import models, transaction
from django.utils.dateparse import parse_datetime
from django.utils.timezone import utc
from django.utils import timezone
from django.utils.html import strip_tags
from boards.models import Post
from boards.models.post import TITLE_MAX_LENGTH
from boards.utils import get_tripcode_from_text
from boards import settings
SOURCE_TYPE_MAX_LENGTH = 100
SOURCE_TYPE_RSS = 'RSS'
TYPE_CHOICES = (
(SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
)
class ThreadSource(models.Model):
class Meta:
app_label = 'boards'
name = models.TextField()
thread = models.ForeignKey('Thread', on_delete=models.CASCADE)
timestamp = models.DateTimeField()
source = models.TextField()
source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
choices=TYPE_CHOICES)
def __str__(self):
return self.name
@transaction.atomic
def fetch_latest_posts(self):
"""Creates new posts with the info fetched since the timestamp."""
logger = logging.getLogger('boards.source')
if self.thread.is_archived():
logger.error('The thread {} is archived, please try another one'.format(self.thread))
else:
tripcode = get_tripcode_from_text(
settings.get('External', 'SourceFetcherTripcode'))
start_timestamp = self.timestamp
last_timestamp = start_timestamp
logger.info('Start timestamp is {}'.format(start_timestamp))
if self.thread.is_bumplimit():
logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
if self.source_type == SOURCE_TYPE_RSS:
feed = feedparser.parse(self.source)
items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
for item in items:
title = self.strip_title(item.title, TITLE_MAX_LENGTH)
timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
if not timestamp:
logger.error('Invalid timestamp {} for {}'.format(item.published, title))
else:
if timestamp > last_timestamp:
last_timestamp = timestamp
if timestamp > start_timestamp:
Post.objects.create_post(title=title, text=self.parse_text(item.description),
thread=self.thread, file_urls=[item.link], tripcode=tripcode)
logger.info('Fetched item {} from {} into thread {}'.format(
title, self.name, self.thread))
logger.info('New timestamp is {}'.format(last_timestamp))
self.timestamp = last_timestamp
self.save(update_fields=['timestamp'])
def parse_text(self, text):
return strip_tags(text)
def strip_title(self, title, max_length):
result = title
if len(title) > max_length:
result = title[:max_length - 1] + '…'
return result