##// END OF EJS Templates
Properly parse feed timestamp
neko259 -
r1970:075e04c3 default
parent child Browse files
Show More
@@ -1,67 +1,68 b''
1 import feedparser
1 import feedparser
2 import logging
2 import logging
3 import calendar
3
4
4 from time import mktime
5 from time import mktime
5 from datetime import datetime
6 from datetime import datetime
6
7
7 from django.db import models, transaction
8 from django.db import models, transaction
8 from django.utils.dateparse import parse_datetime
9 from django.utils.dateparse import parse_datetime
9 from django.utils.timezone import utc
10 from django.utils.timezone import utc
10 from django.utils import timezone
11 from django.utils import timezone
11 from boards.models import Post
12 from boards.models import Post
12 from boards.models.post import TITLE_MAX_LENGTH
13 from boards.models.post import TITLE_MAX_LENGTH
13
14
14
15
15 SOURCE_TYPE_MAX_LENGTH = 100
16 SOURCE_TYPE_MAX_LENGTH = 100
16 SOURCE_TYPE_RSS = 'RSS'
17 SOURCE_TYPE_RSS = 'RSS'
17 TYPE_CHOICES = (
18 TYPE_CHOICES = (
18 (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
19 (SOURCE_TYPE_RSS, SOURCE_TYPE_RSS),
19 )
20 )
20
21
21
22
22 class ThreadSource(models.Model):
23 class ThreadSource(models.Model):
23 class Meta:
24 class Meta:
24 app_label = 'boards'
25 app_label = 'boards'
25
26
26 name = models.TextField()
27 name = models.TextField()
27 thread = models.ForeignKey('Thread')
28 thread = models.ForeignKey('Thread')
28 timestamp = models.DateTimeField()
29 timestamp = models.DateTimeField()
29 source = models.TextField()
30 source = models.TextField()
30 source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
31 source_type = models.CharField(max_length=SOURCE_TYPE_MAX_LENGTH,
31 choices=TYPE_CHOICES)
32 choices=TYPE_CHOICES)
32
33
33 def __str__(self):
34 def __str__(self):
34 return self.name
35 return self.name
35
36
36 @transaction.atomic
37 @transaction.atomic
37 def fetch_latest_posts(self):
38 def fetch_latest_posts(self):
38 """Creates new posts with the info fetched since the timestamp."""
39 """Creates new posts with the info fetched since the timestamp."""
39 logger = logging.getLogger('boards.source')
40 logger = logging.getLogger('boards.source')
40
41
41 if self.thread.is_archived():
42 if self.thread.is_archived():
42 logger.error('The thread {} is archived, please try another one'.format(self.thread))
43 logger.error('The thread {} is archived, please try another one'.format(self.thread))
43 else:
44 else:
44 start_timestamp = self.timestamp
45 start_timestamp = self.timestamp
45 last_timestamp = start_timestamp
46 last_timestamp = start_timestamp
46 logger.info('Start timestamp is {}'.format(start_timestamp))
47 logger.info('Start timestamp is {}'.format(start_timestamp))
47 if self.thread.is_bumplimit():
48 if self.thread.is_bumplimit():
48 logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
49 logger.warn('The thread {} has reached its bumplimit, please create a new one'.format(self.thread))
49 if self.source_type == SOURCE_TYPE_RSS:
50 if self.source_type == SOURCE_TYPE_RSS:
50 feed = feedparser.parse(self.source)
51 feed = feedparser.parse(self.source)
51 items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
52 items = sorted(feed.entries, key=lambda entry: entry.published_parsed)
52 for item in items:
53 for item in items:
53 title = item.title[:TITLE_MAX_LENGTH]
54 title = item.title[:TITLE_MAX_LENGTH]
54 timestamp = datetime.fromtimestamp(mktime(item.published_parsed), tz=utc)
55 timestamp = datetime.fromtimestamp(calendar.timegm(item.published_parsed), tz=utc)
55 if not timestamp:
56 if not timestamp:
56 logger.error('Invalid timestamp {} for {}'.format(item.published, title))
57 logger.error('Invalid timestamp {} for {}'.format(item.published, title))
57 else:
58 else:
58 if timestamp > last_timestamp:
59 if timestamp > last_timestamp:
59 last_timestamp = timestamp
60 last_timestamp = timestamp
60 if timestamp > start_timestamp:
61 if timestamp > start_timestamp:
61 Post.objects.create_post(title=title, text=item.description, thread=self.thread, file_urls=[item.link])
62 Post.objects.create_post(title=title, text=item.description, thread=self.thread, file_urls=[item.link])
62 logger.info('Fetched item {} from {} into thread {}'.format(
63 logger.info('Fetched item {} from {} into thread {}'.format(
63 title, self.name, self.thread))
64 title, self.name, self.thread))
64 logger.info('New timestamp is {}'.format(last_timestamp))
65 logger.info('New timestamp is {}'.format(last_timestamp))
65 self.timestamp = last_timestamp
66 self.timestamp = last_timestamp
66 self.save(update_fields=['timestamp'])
67 self.save(update_fields=['timestamp'])
67
68
General Comments 0
You need to be logged in to leave comments. Login now