##// END OF EJS Templates
Added domain image for bitbucket
Added domain image for bitbucket

File last commit:

r2008:5d3c1f7e default
r2058:eaed6ee1 default
Show More
source.py
85 lines | 3.2 KiB | text/x-python | PythonLexer
import feedparser
import logging
import calendar
from time import mktime
from datetime import datetime
from django.db import models, transaction
from django.utils.dateparse import parse_datetime
from django.utils.timezone import utc
from django.utils import timezone
from django.utils.html import strip_tags
from boards.models import Post
from boards.models.post import TITLE_MAX_LENGTH
from boards.settings import SECTION_EXTERNAL
from boards.utils import get_tripcode_from_text
from boards import settings
SOURCE_TYPE_MAX_LENGTH = 100
SOURCE_TYPE_RSS = 'RSS'
TYPE_CHOICES = (
(SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
)
class ThreadSource(models.Model):
class Meta:
app_label = 'boards'
name = models.TextField()
thread = models.ForeignKey('Thread', on_delete=models.CASCADE)
timestamp = models.DateTimeField()
source = models.TextField()
source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
choices=TYPE_CHOICES)
def __str__(self):
return self.name
@transaction.atomic
def fetch_latest_posts(self):
"""Creates new posts with the info fetched since the timestamp."""
logger = logging.getLogger('boards.source')
if self.thread.is_archived():
logger.error('The thread {} is archived, please try another one'.format(self.thread))
else:
tripcode = get_tripcode_from_text(
settings.get(SECTION_EXTERNAL, 'SourceFetcherTripcode'))
start_timestamp = self.timestamp
last_timestamp = start_timestamp
logger.info('Start timestamp is {}'.format(start_timestamp))
if self.thread.is_bumplimit():
logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
if self.source_type == SOURCE_TYPE_RSS:
feed = feedparser.parse(self.source)
items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
for item in items:
title = self.strip_title(item.title, TITLE_MAX_LENGTH)
timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
if not timestamp:
logger.error('Invalid timestamp {} for {}'.format(item.published, title))
else:
if timestamp > last_timestamp:
last_timestamp = timestamp
if timestamp > start_timestamp:
Post.objects.create_post(title=title, text=self.parse_text(item.description),
thread=self.thread, file_urls=[item.link], tripcode=tripcode)
logger.info('Fetched item {} from {} into thread {}'.format(
title, self.name, self.thread))
logger.info('New timestamp is {}'.format(last_timestamp))
self.timestamp = last_timestamp
self.save(update_fields=['timestamp'])
def parse_text(self, text):
return strip_tags(text)
def strip_title(self, title, max_length):
result = title
if len(title) > max_length:
result = title[:max_length - 1] + '…'
return result