Show More
@@ -0,0 +1,24 | |||||
|
1 | # Generated by Django 2.0.8 on 2018-08-05 17:55 | |||
|
2 | ||||
|
3 | from django.db import migrations, models | |||
|
4 | ||||
|
5 | ||||
|
6 | class Migration(migrations.Migration): | |||
|
7 | ||||
|
8 | dependencies = [ | |||
|
9 | ('boards', '0070_auto_20171225_1149'), | |||
|
10 | ] | |||
|
11 | ||||
|
12 | operations = [ | |||
|
13 | migrations.CreateModel( | |||
|
14 | name='SyncSource', | |||
|
15 | fields=[ | |||
|
16 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), | |||
|
17 | ('name', models.TextField()), | |||
|
18 | ('timestamp', models.DateTimeField(blank=True, null=True)), | |||
|
19 | ('url', models.TextField()), | |||
|
20 | ('tags', models.TextField(blank=True)), | |||
|
21 | ('query_split_limit', models.IntegerField()), | |||
|
22 | ], | |||
|
23 | ), | |||
|
24 | ] |
@@ -6,7 +6,7 from boards.models import Post, Tag, Ban | |||||
6 | KeyPair, GlobalId, TagAlias, STATUS_ACTIVE |
|
6 | KeyPair, GlobalId, TagAlias, STATUS_ACTIVE | |
7 | from boards.models.attachment import FILE_TYPES_IMAGE, AttachmentSticker, \ |
|
7 | from boards.models.attachment import FILE_TYPES_IMAGE, AttachmentSticker, \ | |
8 | StickerPack |
|
8 | StickerPack | |
9 | from boards.models.source import ThreadSource |
|
9 | from boards.models.source import ThreadSource, SyncSource | |
10 |
|
10 | |||
11 |
|
11 | |||
12 | @admin.register(Post) |
|
12 | @admin.register(Post) | |
@@ -191,3 +191,7 class ThreadSourceAdmin(admin.ModelAdmin | |||||
191 | kwargs['queryset'] = Thread.objects.filter(status=STATUS_ACTIVE) |
|
191 | kwargs['queryset'] = Thread.objects.filter(status=STATUS_ACTIVE) | |
192 | return super().formfield_for_foreignkey(db_field, request, **kwargs) |
|
192 | return super().formfield_for_foreignkey(db_field, request, **kwargs) | |
193 |
|
193 | |||
|
194 | ||||
|
195 | @admin.register(SyncSource) | |||
|
196 | class SyncSourceAdmin(admin.ModelAdmin): | |||
|
197 | search_fields = ('name',) No newline at end of file |
@@ -1,140 +1,13 | |||||
1 | import re |
|
1 | from django.core.management import BaseCommand | |
2 | import logging |
|
|||
3 | import xml.etree.ElementTree as ET |
|
|||
4 |
|
2 | |||
5 | import httplib2 |
|
3 | from boards.models.source import SyncSource | |
6 | from django.core.management import BaseCommand |
|
|||
7 | from django.utils.dateparse import parse_datetime |
|
|||
8 |
|
||||
9 | from boards.models import GlobalId, KeyPair |
|
|||
10 | from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME |
|
|||
11 |
|
4 | |||
12 | __author__ = 'neko259' |
|
5 | __author__ = 'neko259' | |
13 |
|
6 | |||
14 |
|
7 | |||
15 | REGEX_GLOBAL_ID = re.compile(r'(\w+)::([\w\+/]+)::(\d+)') |
|
|||
16 |
|
||||
17 |
|
||||
18 | class Command(BaseCommand): |
|
8 | class Command(BaseCommand): | |
19 | help = 'Send a sync or get request to the server.' |
|
9 | help = 'Send a sync or get request to the servers.' | |
20 |
|
||||
21 | def add_arguments(self, parser): |
|
|||
22 | parser.add_argument('url', type=str, help='Server root url') |
|
|||
23 | parser.add_argument('--global-id', type=str, default='', |
|
|||
24 | help='Post global ID') |
|
|||
25 | parser.add_argument('--split-query', type=int, default=1, |
|
|||
26 | help='Split GET query into separate by the given' |
|
|||
27 | ' number of posts in one') |
|
|||
28 | parser.add_argument('--thread', type=int, |
|
|||
29 | help='Get posts of one specific thread') |
|
|||
30 | parser.add_argument('--tags', type=str, |
|
|||
31 | help='Get posts of the tags, comma-separated') |
|
|||
32 | parser.add_argument('--time-from', type=str, |
|
|||
33 | help='Get posts from the given timestamp') |
|
|||
34 |
|
10 | |||
35 | def handle(self, *args, **options): |
|
11 | def handle(self, *args, **options): | |
36 | logger = logging.getLogger('boards.sync') |
|
12 | for source in SyncSource.objects.all(): | |
37 |
|
13 | source.run_sync() | ||
38 | url = options.get('url') |
|
|||
39 |
|
||||
40 | list_url = url + 'api/sync/list/' |
|
|||
41 | get_url = url + 'api/sync/get/' |
|
|||
42 | file_url = url[:-1] |
|
|||
43 |
|
||||
44 | global_id_str = options.get('global_id') |
|
|||
45 | if global_id_str: |
|
|||
46 | match = REGEX_GLOBAL_ID.match(global_id_str) |
|
|||
47 | if match: |
|
|||
48 | key_type = match.group(1) |
|
|||
49 | key = match.group(2) |
|
|||
50 | local_id = match.group(3) |
|
|||
51 |
|
||||
52 | global_id = GlobalId(key_type=key_type, key=key, |
|
|||
53 | local_id=local_id) |
|
|||
54 |
|
||||
55 | xml = SyncManager.generate_request_get([global_id]) |
|
|||
56 | h = httplib2.Http() |
|
|||
57 | response, content = h.request(get_url, method="POST", body=xml) |
|
|||
58 |
|
||||
59 | SyncManager.parse_response_get(content, file_url) |
|
|||
60 | else: |
|
|||
61 | raise Exception('Invalid global ID') |
|
|||
62 | else: |
|
|||
63 | logger.info('Running LIST request...') |
|
|||
64 | h = httplib2.Http() |
|
|||
65 |
|
||||
66 | tags = [] |
|
|||
67 | tags_str = options.get('tags') |
|
|||
68 | if tags_str: |
|
|||
69 | tags = tags_str.split(',') |
|
|||
70 |
|
||||
71 | timestamp_str = options.get('time_from') |
|
|||
72 | timestamp = None |
|
|||
73 | if timestamp_str: |
|
|||
74 | timestamp = parse_datetime(timestamp_str) |
|
|||
75 | if not timestamp: |
|
|||
76 | raise Exception('Timestamp {} cannot be parsed'.format( |
|
|||
77 | timestamp_str)) |
|
|||
78 |
|
||||
79 | xml = SyncManager.generate_request_list( |
|
|||
80 | opening_post=options.get('thread'), tags=tags, |
|
|||
81 | timestamp_from=timestamp).encode() |
|
|||
82 | response, content = h.request(list_url, method="POST", body=xml) |
|
|||
83 | if response.status != 200: |
|
|||
84 | raise Exception('Server returned error {}'.format(response.status)) |
|
|||
85 |
|
||||
86 | logger.info('Processing response...') |
|
|||
87 |
|
||||
88 | root = ET.fromstring(content) |
|
|||
89 | status = root.findall('status')[0].text |
|
|||
90 | if status == 'success': |
|
|||
91 | ids_to_sync = list() |
|
|||
92 |
|
||||
93 | models = root.findall('models')[0] |
|
|||
94 | for model in models: |
|
|||
95 | self.add_to_sync_list(ids_to_sync, logger, model) |
|
|||
96 | logger.info('Starting sync...') |
|
|||
97 |
|
||||
98 | if len(ids_to_sync) > 0: |
|
|||
99 | limit = options.get('split_query', len(ids_to_sync)) |
|
|||
100 | for offset in range(0, len(ids_to_sync), limit): |
|
|||
101 | xml = SyncManager.generate_request_get(ids_to_sync[offset:offset + limit]) |
|
|||
102 | h = httplib2.Http() |
|
|||
103 | logger.info('Running GET request...') |
|
|||
104 | response, content = h.request(get_url, method="POST", body=xml) |
|
|||
105 | logger.info('Processing response...') |
|
|||
106 |
|
||||
107 | SyncManager.parse_response_get(content, file_url) |
|
|||
108 |
|
||||
109 | logger.info('Sync completed successfully') |
|
|||
110 | else: |
|
|||
111 | logger.info('Nothing to get, everything synced') |
|
|||
112 | else: |
|
|||
113 | raise Exception('Invalid response status') |
|
|||
114 |
|
||||
115 | def add_to_sync_list(self, ids_to_sync, logger, model): |
|
|||
116 | tag_id = model.find(TAG_ID) |
|
|||
117 | global_id, exists = GlobalId.from_xml_element(tag_id) |
|
|||
118 | from_this_board = self._is_from_this_board(global_id) |
|
|||
119 | if from_this_board: |
|
|||
120 | # If the post originates from this board, no need to process |
|
|||
121 | # it again, nobody else could modify it |
|
|||
122 | logger.debug('NO SYNC Processed post {}'.format(global_id)) |
|
|||
123 | else: |
|
|||
124 | tag_update_time = model.find(TAG_UPDATE_TIME) |
|
|||
125 | if tag_update_time: |
|
|||
126 | update_time = tag_update_time.text |
|
|||
127 | else: |
|
|||
128 | update_time = None |
|
|||
129 | if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime( |
|
|||
130 | update_time): |
|
|||
131 | logger.debug('SYNC Processed post {}'.format(global_id)) |
|
|||
132 | ids_to_sync.append(global_id) |
|
|||
133 | else: |
|
|||
134 | logger.debug('NO SYNC Processed post {}'.format(global_id)) |
|
|||
135 |
|
||||
136 | def _is_from_this_board(self, global_id): |
|
|||
137 | from_this_board = KeyPair.objects.filter( |
|
|||
138 | key_type=global_id.key_type, |
|
|||
139 | public_key=global_id.key).exists() |
|
|||
140 | return from_this_board |
|
@@ -1,6 +1,8 | |||||
1 | import feedparser |
|
1 | import feedparser | |
|
2 | import httplib2 | |||
2 | import logging |
|
3 | import logging | |
3 | import calendar |
|
4 | import calendar | |
|
5 | import xml.etree.ElementTree as ET | |||
4 |
|
6 | |||
5 | from time import mktime |
|
7 | from time import mktime | |
6 | from datetime import datetime |
|
8 | from datetime import datetime | |
@@ -11,12 +13,14 from django.utils.timezone import utc | |||||
11 | from django.utils import timezone |
|
13 | from django.utils import timezone | |
12 | from django.utils.html import strip_tags |
|
14 | from django.utils.html import strip_tags | |
13 |
|
15 | |||
14 | from boards.models import Post |
|
16 | from boards.models import Post, GlobalId, KeyPair | |
15 | from boards.models.post import TITLE_MAX_LENGTH |
|
17 | from boards.models.post import TITLE_MAX_LENGTH | |
|
18 | from boards.models.post.sync import SyncManager, TAG_ID, TAG_UPDATE_TIME | |||
16 | from boards.settings import SECTION_EXTERNAL |
|
19 | from boards.settings import SECTION_EXTERNAL | |
17 | from boards.utils import get_tripcode_from_text |
|
20 | from boards.utils import get_tripcode_from_text | |
18 | from boards import settings |
|
21 | from boards import settings | |
19 |
|
22 | |||
|
23 | DELIMITER_TAGS = ',' | |||
20 |
|
24 | |||
21 | SOURCE_TYPE_MAX_LENGTH = 100 |
|
25 | SOURCE_TYPE_MAX_LENGTH = 100 | |
22 | SOURCE_TYPE_RSS = 'RSS' |
|
26 | SOURCE_TYPE_RSS = 'RSS' | |
@@ -83,3 +87,109 class ThreadSource(models.Model): | |||||
83 | result = title[:max_length - 1] + 'β¦' |
|
87 | result = title[:max_length - 1] + 'β¦' | |
84 | return result |
|
88 | return result | |
85 |
|
89 | |||
|
90 | ||||
|
91 | class SyncSource(models.Model): | |||
|
92 | class Meta: | |||
|
93 | app_label = 'boards' | |||
|
94 | ||||
|
95 | name = models.TextField() | |||
|
96 | timestamp = models.DateTimeField(blank=True, null=True) | |||
|
97 | url = models.TextField() | |||
|
98 | tags = models.TextField(blank=True) | |||
|
99 | query_split_limit = models.IntegerField() | |||
|
100 | ||||
|
101 | def __str__(self): | |||
|
102 | return self.name | |||
|
103 | ||||
|
104 | @transaction.atomic | |||
|
105 | def run_sync(self): | |||
|
106 | logger = logging.getLogger('boards.sync') | |||
|
107 | ||||
|
108 | tags = [] | |||
|
109 | if self.tags: | |||
|
110 | tags = self.tags.split(DELIMITER_TAGS) | |||
|
111 | ||||
|
112 | timestamp = None | |||
|
113 | if self.timestamp: | |||
|
114 | timestamp = self.timestamp | |||
|
115 | ||||
|
116 | new_timestamp = timezone.now() | |||
|
117 | ||||
|
118 | list_url = '{}api/sync/list/'.format(self.url) | |||
|
119 | get_url = '{}api/sync/get/'.format(self.url) | |||
|
120 | file_url = self.url[:-1] | |||
|
121 | ||||
|
122 | xml = SyncManager.generate_request_list( | |||
|
123 | tags=tags, | |||
|
124 | timestamp_from=timestamp).encode() | |||
|
125 | ||||
|
126 | logger.info('Running LIST request for {}...'.format(self.name)) | |||
|
127 | h = httplib2.Http() | |||
|
128 | response, content = h.request(list_url, method="POST", body=xml) | |||
|
129 | if response.status != 200: | |||
|
130 | raise Exception('Server returned error {}'.format(response.status)) | |||
|
131 | ||||
|
132 | logger.info('Processing response...') | |||
|
133 | ||||
|
134 | root = ET.fromstring(content) | |||
|
135 | status = root.findall('status')[0].text | |||
|
136 | if status == 'success': | |||
|
137 | ids_to_sync = list() | |||
|
138 | ||||
|
139 | models = root.findall('models')[0] | |||
|
140 | for model in models: | |||
|
141 | self.add_to_sync_list(ids_to_sync, logger, model) | |||
|
142 | logger.info('Starting sync...') | |||
|
143 | ||||
|
144 | if len(ids_to_sync) > 0: | |||
|
145 | if self.query_split_limit > 0: | |||
|
146 | limit = min(self.query_split_limit, len(ids_to_sync)) | |||
|
147 | else: | |||
|
148 | limit = len(ids_to_sync) | |||
|
149 | ||||
|
150 | for offset in range(0, len(ids_to_sync), limit): | |||
|
151 | xml = SyncManager.generate_request_get( | |||
|
152 | ids_to_sync[offset:offset + limit]) | |||
|
153 | h = httplib2.Http() | |||
|
154 | logger.info('Running GET request...') | |||
|
155 | response, content = h.request(get_url, method="POST", | |||
|
156 | body=xml) | |||
|
157 | logger.info('Processing response...') | |||
|
158 | ||||
|
159 | SyncManager.parse_response_get(content, file_url) | |||
|
160 | ||||
|
161 | logger.info('Sync completed successfully for {}'.format(self.name)) | |||
|
162 | else: | |||
|
163 | logger.info('Nothing to get for {}, everything synced'.format(self.name)) | |||
|
164 | else: | |||
|
165 | raise Exception('Invalid response status') | |||
|
166 | ||||
|
167 | self.timestamp = new_timestamp | |||
|
168 | self.save(update_fields=['timestamp']) | |||
|
169 | ||||
|
170 | def add_to_sync_list(self, ids_to_sync, logger, model): | |||
|
171 | tag_id = model.find(TAG_ID) | |||
|
172 | global_id, exists = GlobalId.from_xml_element(tag_id) | |||
|
173 | from_this_board = self._is_from_this_board(global_id) | |||
|
174 | if from_this_board: | |||
|
175 | # If the post originates from this board, no need to process | |||
|
176 | # it again, nobody else could modify it | |||
|
177 | logger.debug('NO SYNC Processed post {}'.format(global_id)) | |||
|
178 | else: | |||
|
179 | tag_update_time = model.find(TAG_UPDATE_TIME) | |||
|
180 | if tag_update_time: | |||
|
181 | update_time = tag_update_time.text | |||
|
182 | else: | |||
|
183 | update_time = None | |||
|
184 | if not exists or update_time is None or global_id.post.last_edit_time < parse_datetime( | |||
|
185 | update_time): | |||
|
186 | logger.debug('SYNC Processed post {}'.format(global_id)) | |||
|
187 | ids_to_sync.append(global_id) | |||
|
188 | else: | |||
|
189 | logger.debug('NO SYNC Processed post {}'.format(global_id)) | |||
|
190 | ||||
|
191 | def _is_from_this_board(self, global_id): | |||
|
192 | from_this_board = KeyPair.objects.filter( | |||
|
193 | key_type=global_id.key_type, | |||
|
194 | public_key=global_id.key).exists() | |||
|
195 | return from_this_board |
General Comments 0
You need to be logged in to leave comments.
Login now