##// END OF EJS Templates
Speed up post hiding, do not load the hidden posts list for each post being processed
Speed up post hiding, do not load the hidden posts list for each post being processed

File last commit:

r1928:f3702378 default
r2082:47f758c2 default
Show More
sync.py
389 lines | 14.3 KiB | text/x-python | PythonLexer
import logging
import xml.etree.ElementTree as et
from django.db import transaction
from django.utils.dateparse import parse_datetime
from boards.abstracts.exceptions import SyncException
from boards.abstracts.sync_filters import ThreadFilter, TagsFilter, \
TimestampFromFilter
from boards.models import KeyPair, GlobalId, Signature, Post, Tag
from boards.models.attachment.downloaders import download
from boards.models.signature import TAG_REQUEST, ATTR_TYPE, TYPE_GET, \
ATTR_VERSION, TAG_MODEL, ATTR_NAME, TAG_ID, TYPE_LIST
from boards.utils import get_file_mimetype, get_file_hash
EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
EXCEPTION_THREAD = 'No thread exists for post {}'
ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
TAG_UPDATE_TIME = 'update-time'
TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
TAG_TAGS = 'tags'
TAG_TAG = 'tag'
TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
TAG_TRIPCODE = 'tripcode'
TAG_VERSION = 'version'
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
ATTR_KEY = 'key'
ATTR_REF = 'ref'
ATTR_URL = 'url'
ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
ID_TYPE_URL = 'url'
STATUS_SUCCESS = 'success'
CURRENT_MODEL_VERSION = '1.1'
logger = logging.getLogger('boards.sync')
class SyncManager:
@staticmethod
def generate_response_get(model_list: list):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
global_id = post.global_id
attachments = post.attachments.all()
if global_id.content:
model.append(et.fromstring(global_id.content))
if len(attachments) > 0:
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file)
else:
content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
tag_tag.text = tag.get_name()
else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
update_time = et.SubElement(content_tag, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
if len(attachments) > 0:
attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
else:
attachment_refs = None
for file in attachments:
SyncManager._attachment_to_xml(
attachments_tag, attachment_refs, file)
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
signatures_tag = et.SubElement(model, TAG_SIGNATURES)
post_signatures = global_id.signature_set.all()
if post_signatures:
signatures = post_signatures
else:
key = KeyPair.objects.get(public_key=global_id.key)
signature = Signature(
key_type=key.key_type,
key=key.public_key,
signature=key.sign(global_id.content),
global_id=global_id,
)
signature.save()
signatures = [signature]
for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
signature_tag.set(ATTR_KEY, signature.key)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def parse_response_get(response_xml, hostname):
tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
SyncManager.parse_post(tag_model, hostname)
else:
raise SyncException(EXCEPTION_NODE.format(tag_status.text))
@staticmethod
@transaction.atomic
def parse_post(tag_model, hostname):
tag_content = tag_model.find(TAG_CONTENT)
content_str = et.tostring(tag_content, ENCODING_UNICODE)
tag_id = tag_content.find(TAG_ID)
global_id, exists = GlobalId.from_xml_element(tag_id)
signatures = SyncManager._verify_model(global_id, content_str, tag_model)
update_time = tag_content.find(TAG_UPDATE_TIME).text
is_old = exists and global_id.post.last_edit_time < parse_datetime(update_time)
if exists and not is_old:
logger.debug('Post {} exists and is up to date.'.format(global_id))
else:
global_id.content = content_str
global_id.save()
for signature in signatures:
signature.global_id = global_id
signature.save()
title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
pub_time = tag_content.find(TAG_PUB_TIME).text
thread = tag_content.find(TAG_THREAD)
tags = []
if thread:
thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
raise Exception(EXCEPTION_THREAD.format(global_id))
else:
opening_post = None
tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
tag, created = Tag.objects.get_or_create(
aliases__name=tag_tag.text)
tags.append(tag)
# TODO Check that the replied posts are already present
# before adding new ones
files = []
urls = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
if attachment.get(ATTR_ID_TYPE) == ID_TYPE_URL:
urls.append(attachment.text)
else:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
attached_file = download(hostname + url, validate=False)
if attached_file is None:
raise SyncException(EXCEPTION_DOWNLOAD)
hash = get_file_hash(attached_file)
if hash != attachment.text:
raise SyncException(EXCEPTION_HASH)
files.append(attached_file)
if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, file_urls=urls,
tripcode=tripcode, version=version, last_edit_time=update_time)
logger.debug('Parsed updated post {}'.format(global_id))
else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files,
file_urls=urls, tripcode=tripcode, last_edit_time=update_time)
logger.debug('Parsed new post {}'.format(global_id))
@staticmethod
def generate_response_list(filters):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
posts = Post.objects.prefetch_related('global_id')
for post_filter in filters:
posts = post_filter.filter(posts)
for post in posts:
tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
post.global_id.to_xml_element(tag_id)
update_time = et.SubElement(tag_model, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def _verify_model(global_id, content_str, tag_model):
"""
Verifies all signatures for a single model.
"""
signatures = []
tag_signatures = tag_model.find(TAG_SIGNATURES)
has_author_signature = False
for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
if not KeyPair.objects.verify(signature, content_str):
raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
signatures.append(signature)
if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
return signatures
@staticmethod
def _attachment_to_xml(tag_attachments, tag_refs, attachment):
if tag_attachments is not None:
attachment_tag = et.SubElement(tag_attachments, TAG_ATTACHMENT)
if attachment.is_internal():
mimetype = get_file_mimetype(attachment.file.file)
attachment_tag.set(ATTR_MIMETYPE, mimetype)
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_MD5)
attachment_tag.text = attachment.hash
else:
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_URL)
attachment_tag.text = attachment.url
if tag_refs is not None and attachment.is_internal():
attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, attachment.hash)
attachment_ref.set(ATTR_URL, attachment.file.url)
@staticmethod
def generate_request_get(global_id_list: list):
"""
Form a get request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_GET)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, '1.0')
model.set(ATTR_NAME, 'post')
for global_id in global_id_list:
tag_id = et.SubElement(model, TAG_ID)
global_id.to_xml_element(tag_id)
return et.tostring(request, 'unicode')
@staticmethod
def generate_request_list(opening_post=None, tags=list(),
timestamp_from=None):
"""
Form a pull request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_LIST)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, CURRENT_MODEL_VERSION)
model.set(ATTR_NAME, 'post')
if opening_post:
ThreadFilter().add_filter(model, opening_post)
if tags:
TagsFilter().add_filter(model, tags)
if timestamp_from:
TimestampFromFilter().add_filter(model, timestamp_from)
return et.tostring(request, 'unicode')