source.py
84 lines
| 3.2 KiB
| text/x-python
|
PythonLexer
neko259
|
r1968 | import feedparser | ||
import logging | ||||
neko259
|
r1970 | import calendar | ||
neko259
|
r1968 | |||
from time import mktime | ||||
from datetime import datetime | ||||
from django.db import models, transaction | ||||
from django.utils.dateparse import parse_datetime | ||||
from django.utils.timezone import utc | ||||
from django.utils import timezone | ||||
neko259
|
r1977 | from django.utils.html import strip_tags | ||
neko259
|
r1973 | |||
neko259
|
r1968 | from boards.models import Post | ||
from boards.models.post import TITLE_MAX_LENGTH | ||||
neko259
|
r1973 | from boards.utils import get_tripcode_from_text | ||
from boards import settings | ||||
neko259
|
r1968 | |||
SOURCE_TYPE_MAX_LENGTH = 100 | ||||
SOURCE_TYPE_RSS = 'RSS' | ||||
TYPE_CHOICES = ( | ||||
(SOURCE_TYPE_RSS, SOURCE_TYPE_RSS), | ||||
) | ||||
class ThreadSource(models.Model): | ||||
class Meta: | ||||
app_label = 'boards' | ||||
name = models.TextField() | ||||
neko259
|
r1986 | thread = models.ForeignKey('Thread', on_delete=models.CASCADE) | ||
neko259
|
r1968 | timestamp = models.DateTimeField() | ||
source = models.TextField() | ||||
source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH, | ||||
choices=TYPE_CHOICES) | ||||
def __str__(self): | ||||
return self.name | ||||
@transaction.atomic | ||||
def fetch_latest_posts(self): | ||||
"""Creates new posts with the info fetched since the timestamp.""" | ||||
logger = logging.getLogger('boards.source') | ||||
if self.thread.is_archived(): | ||||
logger.error('The thread {} is archived, please try another one'.format(self.thread)) | ||||
else: | ||||
neko259
|
r1973 | tripcode = get_tripcode_from_text( | ||
settings.get('External', 'SourceFetcherTripcode')) | ||||
neko259
|
r1969 | start_timestamp = self.timestamp | ||
neko259
|
r1968 | last_timestamp = start_timestamp | ||
neko259
|
r1969 | logger.info('Start timestamp is {}'.format(start_timestamp)) | ||
neko259
|
r1968 | if self.thread.is_bumplimit(): | ||
logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread)) | ||||
if self.source_type == SOURCE_TYPE_RSS: | ||||
feed = feedparser.parse(self.source) | ||||
items = sorted(feed.entries, key=lambda entry: entry.published_parsed) | ||||
for item in items: | ||||
neko259
|
r1978 | title = self.strip_title(item.title, TITLE_MAX_LENGTH) | ||
neko259
|
r1970 | timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc) | ||
neko259
|
r1968 | if not timestamp: | ||
logger.error('Invalid timestamp {} for {}'.format(item.published, title)) | ||||
else: | ||||
if timestamp > last_timestamp: | ||||
last_timestamp = timestamp | ||||
if timestamp > start_timestamp: | ||||
neko259
|
r1977 | Post.objects.create_post(title=title, text=self.parse_text(item.description), | ||
neko259
|
r1973 | thread=self.thread, file_urls=[item.link], tripcode=tripcode) | ||
neko259
|
r1968 | logger.info('Fetched item {} from {} into thread {}'.format( | ||
title, self.name, self.thread)) | ||||
neko259
|
r1969 | logger.info('New timestamp is {}'.format(last_timestamp)) | ||
neko259
|
r1968 | self.timestamp = last_timestamp | ||
self.save(update_fields=['timestamp']) | ||||
neko259
|
r1977 | def parse_text(self, text): | ||
return strip_tags(text) | ||||
neko259
|
r1978 | |||
def strip_title(self, title, max_length): | ||||
result = title | ||||
if len(title) > max_length: | ||||
result = title[:max_length - 1] + '…' | ||||
return result | ||||