|
|
import feedparser
|
|
|
import logging
|
|
|
import calendar
|
|
|
|
|
|
from time import mktime
|
|
|
from datetime import datetime
|
|
|
|
|
|
from django.db import models, transaction
|
|
|
from django.utils.dateparse import parse_datetime
|
|
|
from django.utils.timezone import utc
|
|
|
from django.utils import timezone
|
|
|
from django.utils.html import strip_tags
|
|
|
|
|
|
from boards.models import Post
|
|
|
from boards.models.post import TITLE_MAX_LENGTH
|
|
|
from boards.utils import get_tripcode_from_text
|
|
|
from boards import settings
|
|
|
|
|
|
|
|
|
SOURCE_TYPE_MAX_LENGTH = 100
|
|
|
SOURCE_TYPE_RSS = 'RSS'
|
|
|
TYPE_CHOICES = (
|
|
|
(SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
|
|
|
)
|
|
|
|
|
|
|
|
|
class ThreadSource(models.Model):
|
|
|
class Meta:
|
|
|
app_label = 'boards'
|
|
|
|
|
|
name = models.TextField()
|
|
|
thread = models.ForeignKey('Thread', on_delete=models.CASCADE)
|
|
|
timestamp = models.DateTimeField()
|
|
|
source = models.TextField()
|
|
|
source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
|
|
|
choices=TYPE_CHOICES)
|
|
|
|
|
|
def __str__(self):
|
|
|
return self.name
|
|
|
|
|
|
@transaction.atomic
|
|
|
def fetch_latest_posts(self):
|
|
|
"""Creates new posts with the info fetched since the timestamp."""
|
|
|
logger = logging.getLogger('boards.source')
|
|
|
|
|
|
if self.thread.is_archived():
|
|
|
logger.error('The thread {} is archived, please try another one'.format(self.thread))
|
|
|
else:
|
|
|
tripcode = get_tripcode_from_text(
|
|
|
settings.get('External', 'SourceFetcherTripcode'))
|
|
|
start_timestamp = self.timestamp
|
|
|
last_timestamp = start_timestamp
|
|
|
logger.info('Start timestamp is {}'.format(start_timestamp))
|
|
|
if self.thread.is_bumplimit():
|
|
|
logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
|
|
|
if self.source_type == SOURCE_TYPE_RSS:
|
|
|
feed = feedparser.parse(self.source)
|
|
|
items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
|
|
|
for item in items:
|
|
|
title = self.strip_title(item.title, TITLE_MAX_LENGTH)
|
|
|
timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
|
|
|
if not timestamp:
|
|
|
logger.error('Invalid timestamp {} for {}'.format(item.published, title))
|
|
|
else:
|
|
|
if timestamp > last_timestamp:
|
|
|
last_timestamp = timestamp
|
|
|
if timestamp > start_timestamp:
|
|
|
Post.objects.create_post(title=title, text=self.parse_text(item.description),
|
|
|
thread=self.thread, file_urls=[item.link], tripcode=tripcode)
|
|
|
logger.info('Fetched item {} from {} into thread {}'.format(
|
|
|
title, self.name, self.thread))
|
|
|
logger.info('New timestamp is {}'.format(last_timestamp))
|
|
|
self.timestamp = last_timestamp
|
|
|
self.save(update_fields=['timestamp'])
|
|
|
|
|
|
def parse_text(self, text):
|
|
|
return strip_tags(text)
|
|
|
|
|
|
def strip_title(self, title, max_length):
|
|
|
result = title
|
|
|
if len(title) > max_length:
|
|
|
result = title[:max_length - 1] + '…'
|
|
|
return result
|
|
|
|
|
|
|