##// END OF EJS Templates
Administration for sync sources. Now sync is available from the admin site instead of manual command line arguments. Still need to invoke sync_with_server by cron/timer
neko259 -
r2123:be532800 default
parent child Browse files
Show More
@@ -0,0 +1,24
1 # Generated by Django 2.0.8 on 2018-08-05 17:55
2
3 from django.db import migrations, models
4
5
6 class Migration(migrations.Migration):
7
8 dependencies = [
9 ('boards', '0070_auto_20171225_1149'),
10 ]
11
12 operations = [
13 migrations.CreateModel(
14 name='SyncSource',
15 fields=[
16 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
17 ('name', models.TextField()),
18 ('timestamp', models.DateTimeField(blank=True, null=True)),
19 ('url', models.TextField()),
20 ('tags', models.TextField(blank=True)),
21 ('query_split_limit', models.IntegerField()),
22 ],
23 ),
24 ]
@@ -6,7 +6,7 from boards.models import Post, Tag, Ban
6 KeyPair, GlobalId, TagAlias, STATUS_ACTIVE
6 KeyPair, GlobalId, TagAlias, STATUS_ACTIVE
7 from boards.models.attachment import FILE_TYPES_IMAGE, AttachmentSticker, \
7 from boards.models.attachment import FILE_TYPES_IMAGE, AttachmentSticker, \
8 StickerPack
8 StickerPack
9 from boards.models.source import ThreadSource
9 from boards.models.source import ThreadSource, SyncSource
10
10
11
11
12 @admin.register(Post)
12 @admin.register(Post)
@@ -191,3 +191,7 class ThreadSourceAdmin(admin.ModelAdmin
191 kwargs['queryset'] = Thread.objects.filter(status=STATUS_ACTIVE)
191 kwargs['queryset'] = Thread.objects.filter(status=STATUS_ACTIVE)
192 return super().formfield_for_foreignkey(db_field, request, **kwargs)
192 return super().formfield_for_foreignkey(db_field, request, **kwargs)
193
193
194
195 @admin.register(SyncSource)
196 class SyncSourceAdmin(admin.ModelAdmin):
197 search_fields = ('name',) No newline at end of file
@@ -1,140 +1,13
1 import re
1 from django.core.management import BaseCommand
2 import logging
3 import xml.etree.ElementTree as ET
4
2
5 import httplib2
3 from boards.models.source import SyncSource
6 from django.core.management import BaseCommand
7 from django.utils.dateparse import parse_datetime
8
9 from boards.models import GlobalId, KeyPair
10 from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME
11
4
12 __author__ = 'neko259'
5 __author__ = 'neko259'
13
6
14
7
15 REGEX_GLOBAL_ID = re.compile(r'(\w+)::([\w\+/]+)::(\d+)')
16
17
18 class Command(BaseCommand):
8 class Command(BaseCommand):
19 help = 'Send a sync or get request to the server.'
9 help = 'Send a sync or get request to the servers.'
20
21 def add_arguments(self, parser):
22 parser.add_argument('url', type=str, help='Server root url')
23 parser.add_argument('--global-id', type=str, default='',
24 help='Post global ID')
25 parser.add_argument('--split-query', type=int, default=1,
26 help='Split GET query into separate by the given'
27 ' number of posts in one')
28 parser.add_argument('--thread', type=int,
29 help='Get posts of one specific thread')
30 parser.add_argument('--tags', type=str,
31 help='Get posts of the tags, comma-separated')
32 parser.add_argument('--time-from', type=str,
33 help='Get posts from the given timestamp')
34
10
35 def handle(self, *args, **options):
11 def handle(self, *args, **options):
36 logger = logging.getLogger('boards.sync')
12 for source in SyncSource.objects.all():
37
13 source.run_sync()
38 url = options.get('url')
39
40 list_url = url + 'api/sync/list/'
41 get_url = url + 'api/sync/get/'
42 file_url = url[:-1]
43
44 global_id_str = options.get('global_id')
45 if global_id_str:
46 match = REGEX_GLOBAL_ID.match(global_id_str)
47 if match:
48 key_type = match.group(1)
49 key = match.group(2)
50 local_id = match.group(3)
51
52 global_id = GlobalId(key_type=key_type, key=key,
53 local_id=local_id)
54
55 xml = SyncManager.generate_request_get([global_id])
56 h = httplib2.Http()
57 response, content = h.request(get_url, method="POST", body=xml)
58
59 SyncManager.parse_response_get(content, file_url)
60 else:
61 raise Exception('Invalid global ID')
62 else:
63 logger.info('Running LIST request...')
64 h = httplib2.Http()
65
66 tags = []
67 tags_str = options.get('tags')
68 if tags_str:
69 tags = tags_str.split(',')
70
71 timestamp_str = options.get('time_from')
72 timestamp = None
73 if timestamp_str:
74 timestamp = parse_datetime(timestamp_str)
75 if not timestamp:
76 raise Exception('Timestamp {} cannot be parsed'.format(
77 timestamp_str))
78
79 xml = SyncManager.generate_request_list(
80 opening_post=options.get('thread'), tags=tags,
81 timestamp_from=timestamp).encode()
82 response, content = h.request(list_url, method="POST", body=xml)
83 if response.status != 200:
84 raise Exception('Server returned error {}'.format(response.status))
85
86 logger.info('Processing response...')
87
88 root = ET.fromstring(content)
89 status = root.findall('status')[0].text
90 if status == 'success':
91 ids_to_sync = list()
92
93 models = root.findall('models')[0]
94 for model in models:
95 self.add_to_sync_list(ids_to_sync, logger, model)
96 logger.info('Starting sync...')
97
98 if len(ids_to_sync) > 0:
99 limit = options.get('split_query', len(ids_to_sync))
100 for offset in range(0, len(ids_to_sync), limit):
101 xml = SyncManager.generate_request_get(ids_to_sync[offset:offset + limit])
102 h = httplib2.Http()
103 logger.info('Running GET request...')
104 response, content = h.request(get_url, method="POST", body=xml)
105 logger.info('Processing response...')
106
107 SyncManager.parse_response_get(content, file_url)
108
109 logger.info('Sync completed successfully')
110 else:
111 logger.info('Nothing to get, everything synced')
112 else:
113 raise Exception('Invalid response status')
114
115 def add_to_sync_list(self, ids_to_sync, logger, model):
116 tag_id = model.find(TAG_ID)
117 global_id, exists = GlobalId.from_xml_element(tag_id)
118 from_this_board = self._is_from_this_board(global_id)
119 if from_this_board:
120 # If the post originates from this board, no need to process
121 # it again, nobody else could modify it
122 logger.debug('NO SYNC Processed post {}'.format(global_id))
123 else:
124 tag_update_time = model.find(TAG_UPDATE_TIME)
125 if tag_update_time:
126 update_time = tag_update_time.text
127 else:
128 update_time = None
129 if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime(
130 update_time):
131 logger.debug('SYNC Processed post {}'.format(global_id))
132 ids_to_sync.append(global_id)
133 else:
134 logger.debug('NO SYNC Processed post {}'.format(global_id))
135
136 def _is_from_this_board(self, global_id):
137 from_this_board = KeyPair.objects.filter(
138 key_type=global_id.key_type,
139 public_key=global_id.key).exists()
140 return from_this_board
@@ -1,6 +1,8
1 import feedparser
1 import feedparser
2 import httplib2
2 import logging
3 import logging
3 import calendar
4 import calendar
5 import xml.etree.ElementTree as ET
4
6
5 from time import mktime
7 from time import mktime
6 from datetime import datetime
8 from datetime import datetime
@@ -11,12 +13,14 from django.utils.timezone import utc
11 from django.utils import timezone
13 from django.utils import timezone
12 from django.utils.html import strip_tags
14 from django.utils.html import strip_tags
13
15
14 from boards.models import Post
16 from boards.models import Post, GlobalId, KeyPair
15 from boards.models.post import TITLE_MAX_LENGTH
17 from boards.models.post import TITLE_MAX_LENGTH
18 from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME
16 from boards.settings import SECTION_EXTERNAL
19 from boards.settings import SECTION_EXTERNAL
17 from boards.utils import get_tripcode_from_text
20 from boards.utils import get_tripcode_from_text
18 from boards import settings
21 from boards import settings
19
22
23 DELIMITER_TAGS = ','
20
24
21 SOURCE_TYPE_MAX_LENGTH = 100
25 SOURCE_TYPE_MAX_LENGTH = 100
22 SOURCE_TYPE_RSS = 'RSS'
26 SOURCE_TYPE_RSS = 'RSS'
@@ -83,3 +87,109 class ThreadSource(models.Model):
83 result = title[:max_length - 1] + '…'
87 result = title[:max_length - 1] + '…'
84 return result
88 return result
85
89
90
91 class SyncSource(models.Model):
92 class Meta:
93 app_label = 'boards'
94
95 name = models.TextField()
96 timestamp = models.DateTimeField(blank=True, null=True)
97 url = models.TextField()
98 tags = models.TextField(blank=True)
99 query_split_limit = models.IntegerField()
100
101 def __str__(self):
102 return self.name
103
104 @transaction.atomic
105 def run_sync(self):
106 logger = logging.getLogger('boards.sync')
107
108 tags = []
109 if self.tags:
110 tags = self.tags.split(DELIMITER_TAGS)
111
112 timestamp = None
113 if self.timestamp:
114 timestamp = self.timestamp
115
116 new_timestamp = timezone.now()
117
118 list_url = '{}api/sync/list/'.format(self.url)
119 get_url = '{}api/sync/get/'.format(self.url)
120 file_url = self.url[:-1]
121
122 xml = SyncManager.generate_request_list(
123 tags=tags,
124 timestamp_from=timestamp).encode()
125
126 logger.info('Running LIST request for {}...'.format(self.name))
127 h = httplib2.Http()
128 response, content = h.request(list_url, method="POST", body=xml)
129 if response.status != 200:
130 raise Exception('Server returned error {}'.format(response.status))
131
132 logger.info('Processing response...')
133
134 root = ET.fromstring(content)
135 status = root.findall('status')[0].text
136 if status == 'success':
137 ids_to_sync = list()
138
139 models = root.findall('models')[0]
140 for model in models:
141 self.add_to_sync_list(ids_to_sync, logger, model)
142 logger.info('Starting sync...')
143
144 if len(ids_to_sync) > 0:
145 if self.query_split_limit > 0:
146 limit = min(self.query_split_limit, len(ids_to_sync))
147 else:
148 limit = len(ids_to_sync)
149
150 for offset in range(0, len(ids_to_sync), limit):
151 xml = SyncManager.generate_request_get(
152 ids_to_sync[offset:offset + limit])
153 h = httplib2.Http()
154 logger.info('Running GET request...')
155 response, content = h.request(get_url, method="POST",
156 body=xml)
157 logger.info('Processing response...')
158
159 SyncManager.parse_response_get(content, file_url)
160
161 logger.info('Sync completed successfully for {}'.format(self.name))
162 else:
163 logger.info('Nothing to get for {}, everything synced'.format(self.name))
164 else:
165 raise Exception('Invalid response status')
166
167 self.timestamp = new_timestamp
168 self.save(update_fields=['timestamp'])
169
170 def add_to_sync_list(self, ids_to_sync, logger, model):
171 tag_id = model.find(TAG_ID)
172 global_id, exists = GlobalId.from_xml_element(tag_id)
173 from_this_board = self._is_from_this_board(global_id)
174 if from_this_board:
175 # If the post originates from this board, no need to process
176 # it again, nobody else could modify it
177 logger.debug('NO SYNC Processed post {}'.format(global_id))
178 else:
179 tag_update_time = model.find(TAG_UPDATE_TIME)
180 if tag_update_time:
181 update_time = tag_update_time.text
182 else:
183 update_time = None
184 if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime(
185 update_time):
186 logger.debug('SYNC Processed post {}'.format(global_id))
187 ids_to_sync.append(global_id)
188 else:
189 logger.debug('NO SYNC Processed post {}'.format(global_id))
190
191 def _is_from_this_board(self, global_id):
192 from_this_board = KeyPair.objects.filter(
193 key_type=global_id.key_type,
194 public_key=global_id.key).exists()
195 return from_this_board
General Comments 0
You need to be logged in to leave comments. Login now