# HG changeset patch # User neko259 # Date 2018-08-05 18:08:22 # Node ID be532800cb5572daa70645ff547c56088ea50b91 # Parent edce8526e66073d12974dec41c6a4666a4676ba4 Administration for sync sources. Now sync is available from the admin site instead of manual command line arguments. Still need to invoke sync_with_server by cron/timer diff --git a/boards/admin.py b/boards/admin.py --- a/boards/admin.py +++ b/boards/admin.py @@ -6,7 +6,7 @@ from boards.models import Post, Tag, Ban KeyPair, GlobalId, TagAlias, STATUS_ACTIVE from boards.models.attachment import FILE_TYPES_IMAGE, AttachmentSticker, \ StickerPack -from boards.models.source import ThreadSource +from boards.models.source import ThreadSource, SyncSource @admin.register(Post) @@ -191,3 +191,7 @@ class ThreadSourceAdmin(admin.ModelAdmin kwargs['queryset'] = Thread.objects.filter(status=STATUS_ACTIVE) return super().formfield_for_foreignkey(db_field, request, **kwargs) + +@admin.register(SyncSource) +class SyncSourceAdmin(admin.ModelAdmin): + search_fields = ('name',) \ No newline at end of file diff --git a/boards/management/commands/sync_with_server.py b/boards/management/commands/sync_with_server.py --- a/boards/management/commands/sync_with_server.py +++ b/boards/management/commands/sync_with_server.py @@ -1,140 +1,13 @@ -import re -import logging -import xml.etree.ElementTree as ET +from django.core.management import BaseCommand -import httplib2 -from django.core.management import BaseCommand -from django.utils.dateparse import parse_datetime - -from boards.models import GlobalId, KeyPair -from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME +from boards.models.source import SyncSource __author__ = 'neko259' -REGEX_GLOBAL_ID = re.compile(r'(\w+)::([\w\+/]+)::(\d+)') - - class Command(BaseCommand): - help = 'Send a sync or get request to the server.' - - def add_arguments(self, parser): - parser.add_argument('url', type=str, help='Server root url') - parser.add_argument('--global-id', type=str, default='', - help='Post global ID') - parser.add_argument('--split-query', type=int, default=1, - help='Split GET query into separate by the given' - ' number of posts in one') - parser.add_argument('--thread', type=int, - help='Get posts of one specific thread') - parser.add_argument('--tags', type=str, - help='Get posts of the tags, comma-separated') - parser.add_argument('--time-from', type=str, - help='Get posts from the given timestamp') + help = 'Send a sync or get request to the servers.' def handle(self, *args, **options): - logger = logging.getLogger('boards.sync') - - url = options.get('url') - - list_url = url + 'api/sync/list/' - get_url = url + 'api/sync/get/' - file_url = url[:-1] - - global_id_str = options.get('global_id') - if global_id_str: - match = REGEX_GLOBAL_ID.match(global_id_str) - if match: - key_type = match.group(1) - key = match.group(2) - local_id = match.group(3) - - global_id = GlobalId(key_type=key_type, key=key, - local_id=local_id) - - xml = SyncManager.generate_request_get([global_id]) - h = httplib2.Http() - response, content = h.request(get_url, method="POST", body=xml) - - SyncManager.parse_response_get(content, file_url) - else: - raise Exception('Invalid global ID') - else: - logger.info('Running LIST request...') - h = httplib2.Http() - - tags = [] - tags_str = options.get('tags') - if tags_str: - tags = tags_str.split(',') - - timestamp_str = options.get('time_from') - timestamp = None - if timestamp_str: - timestamp = parse_datetime(timestamp_str) - if not timestamp: - raise Exception('Timestamp {} cannot be parsed'.format( - timestamp_str)) - - xml = SyncManager.generate_request_list( - opening_post=options.get('thread'), tags=tags, - timestamp_from=timestamp).encode() - response, content = h.request(list_url, method="POST", body=xml) - if response.status != 200: - raise Exception('Server returned error {}'.format(response.status)) - - logger.info('Processing response...') - - root = ET.fromstring(content) - status = root.findall('status')[0].text - if status == 'success': - ids_to_sync = list() - - models = root.findall('models')[0] - for model in models: - self.add_to_sync_list(ids_to_sync, logger, model) - logger.info('Starting sync...') - - if len(ids_to_sync) > 0: - limit = options.get('split_query', len(ids_to_sync)) - for offset in range(0, len(ids_to_sync), limit): - xml = SyncManager.generate_request_get(ids_to_sync[offset:offset + limit]) - h = httplib2.Http() - logger.info('Running GET request...') - response, content = h.request(get_url, method="POST", body=xml) - logger.info('Processing response...') - - SyncManager.parse_response_get(content, file_url) - - logger.info('Sync completed successfully') - else: - logger.info('Nothing to get, everything synced') - else: - raise Exception('Invalid response status') - - def add_to_sync_list(self, ids_to_sync, logger, model): - tag_id = model.find(TAG_ID) - global_id, exists = GlobalId.from_xml_element(tag_id) - from_this_board = self._is_from_this_board(global_id) - if from_this_board: - # If the post originates from this board, no need to process - # it again, nobody else could modify it - logger.debug('NO SYNC Processed post {}'.format(global_id)) - else: - tag_update_time = model.find(TAG_UPDATE_TIME) - if tag_update_time: - update_time = tag_update_time.text - else: - update_time = None - if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime( - update_time): - logger.debug('SYNC Processed post {}'.format(global_id)) - ids_to_sync.append(global_id) - else: - logger.debug('NO SYNC Processed post {}'.format(global_id)) - - def _is_from_this_board(self, global_id): - from_this_board = KeyPair.objects.filter( - key_type=global_id.key_type, - public_key=global_id.key).exists() - return from_this_board + for source in SyncSource.objects.all(): + source.run_sync() diff --git a/boards/migrations/0071_syncsource.py b/boards/migrations/0071_syncsource.py new file mode 100644 --- /dev/null +++ b/boards/migrations/0071_syncsource.py @@ -0,0 +1,24 @@ +# Generated by Django 2.0.8 on 2018-08-05 17:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('boards', '0070_auto_20171225_1149'), + ] + + operations = [ + migrations.CreateModel( + name='SyncSource', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.TextField()), + ('timestamp', models.DateTimeField(blank=True, null=True)), + ('url', models.TextField()), + ('tags', models.TextField(blank=True)), + ('query_split_limit', models.IntegerField()), + ], + ), + ] diff --git a/boards/models/source.py b/boards/models/source.py --- a/boards/models/source.py +++ b/boards/models/source.py @@ -1,6 +1,8 @@ import feedparser +import httplib2 import logging import calendar +import xml.etree.ElementTree as ET from time import mktime from datetime import datetime @@ -11,12 +13,14 @@ from django.utils.timezone import utc from django.utils import timezone from django.utils.html import strip_tags -from boards.models import Post +from boards.models import Post, GlobalId, KeyPair from boards.models.post import TITLE_MAX_LENGTH +from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME from boards.settings import SECTION_EXTERNAL from boards.utils import get_tripcode_from_text from boards import settings +DELIMITER_TAGS = ',' SOURCE_TYPE_MAX_LENGTH = 100 SOURCE_TYPE_RSS = 'RSS' @@ -83,3 +87,109 @@ class ThreadSource(models.Model): result = title[:max_length - 1] + '…' return result + +class SyncSource(models.Model): + class Meta: + app_label = 'boards' + + name = models.TextField() + timestamp = models.DateTimeField(blank=True, null=True) + url = models.TextField() + tags = models.TextField(blank=True) + query_split_limit = models.IntegerField() + + def __str__(self): + return self.name + + @transaction.atomic + def run_sync(self): + logger = logging.getLogger('boards.sync') + + tags = [] + if self.tags: + tags = self.tags.split(DELIMITER_TAGS) + + timestamp = None + if self.timestamp: + timestamp = self.timestamp + + new_timestamp = timezone.now() + + list_url = '{}api/sync/list/'.format(self.url) + get_url = '{}api/sync/get/'.format(self.url) + file_url = self.url[:-1] + + xml = SyncManager.generate_request_list( + tags=tags, + timestamp_from=timestamp).encode() + + logger.info('Running LIST request for {}...'.format(self.name)) + h = httplib2.Http() + response, content = h.request(list_url, method="POST", body=xml) + if response.status != 200: + raise Exception('Server returned error {}'.format(response.status)) + + logger.info('Processing response...') + + root = ET.fromstring(content) + status = root.findall('status')[0].text + if status == 'success': + ids_to_sync = list() + + models = root.findall('models')[0] + for model in models: + self.add_to_sync_list(ids_to_sync, logger, model) + logger.info('Starting sync...') + + if len(ids_to_sync) > 0: + if self.query_split_limit > 0: + limit = min(self.query_split_limit, len(ids_to_sync)) + else: + limit = len(ids_to_sync) + + for offset in range(0, len(ids_to_sync), limit): + xml = SyncManager.generate_request_get( + ids_to_sync[offset:offset + limit]) + h = httplib2.Http() + logger.info('Running GET request...') + response, content = h.request(get_url, method="POST", + body=xml) + logger.info('Processing response...') + + SyncManager.parse_response_get(content, file_url) + + logger.info('Sync completed successfully for {}'.format(self.name)) + else: + logger.info('Nothing to get for {}, everything synced'.format(self.name)) + else: + raise Exception('Invalid response status') + + self.timestamp = new_timestamp + self.save(update_fields=['timestamp']) + + def add_to_sync_list(self, ids_to_sync, logger, model): + tag_id = model.find(TAG_ID) + global_id, exists = GlobalId.from_xml_element(tag_id) + from_this_board = self._is_from_this_board(global_id) + if from_this_board: + # If the post originates from this board, no need to process + # it again, nobody else could modify it + logger.debug('NO SYNC Processed post {}'.format(global_id)) + else: + tag_update_time = model.find(TAG_UPDATE_TIME) + if tag_update_time: + update_time = tag_update_time.text + else: + update_time = None + if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime( + update_time): + logger.debug('SYNC Processed post {}'.format(global_id)) + ids_to_sync.append(global_id) + else: + logger.debug('NO SYNC Processed post {}'.format(global_id)) + + def _is_from_this_board(self, global_id): + from_this_board = KeyPair.objects.filter( + key_type=global_id.key_type, + public_key=global_id.key).exists() + return from_this_board