##// END OF EJS Templates
Fixed tag parsing
Fixed tag parsing

File last commit:

r2104:2e6f7c21 default
r2104:2e6f7c21 default
Show More
sync.py
395 lines | 14.4 KiB | text/x-python | PythonLexer
import logging
import xml.etree.ElementTree as et
from django.db import transaction
from django.utils.dateparse import parse_datetime
from boards.abstracts.exceptions import SyncException
from boards.abstracts.sync_filters import ThreadFilter, TagsFilter, \
TimestampFromFilter
from boards.models import KeyPair, GlobalId, Signature, Post, Tag
from boards.models.attachment.downloaders import download
from boards.models.signature import TAG_REQUEST, ATTR_TYPE, TYPE_GET, \
ATTR_VERSION, TAG_MODEL, ATTR_NAME, TAG_ID, TYPE_LIST
from boards.utils import get_file_mimetype, get_file_hash
EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
EXCEPTION_THREAD = 'No thread exists for post {}'
ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
TAG_UPDATE_TIME = 'update-time'
TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
TAG_TAGS = 'tags'
TAG_TAG = 'tag'
TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
TAG_TRIPCODE = 'tripcode'
TAG_VERSION = 'version'
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
ATTR_KEY = 'key'
ATTR_REF = 'ref'
ATTR_URL = 'url'
ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
ID_TYPE_URL = 'url'
STATUS_SUCCESS = 'success'
CURRENT_MODEL_VERSION = '1.1'
logger = logging.getLogger('boards.sync')
class SyncManager:
@staticmethod
def generate_response_get(model_list: list):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
global_id = post.global_id
attachments = post.attachments.all()
if global_id.content:
model.append(et.fromstring(global_id.content))
if len(attachments) > 0:
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file)
else:
content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
tag_tag.text = tag.get_name()
else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
update_time = et.SubElement(content_tag, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
if len(attachments) > 0:
attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
else:
attachment_refs = None
for file in attachments:
SyncManager._attachment_to_xml(
attachments_tag, attachment_refs, file)
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
signatures_tag = et.SubElement(model, TAG_SIGNATURES)
post_signatures = global_id.signature_set.all()
if post_signatures:
signatures = post_signatures
else:
key = KeyPair.objects.get(public_key=global_id.key)
signature = Signature(
key_type=key.key_type,
key=key.public_key,
signature=key.sign(global_id.content),
global_id=global_id,
)
signature.save()
signatures = [signature]
for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
signature_tag.set(ATTR_KEY, signature.key)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def parse_response_get(response_xml, hostname):
tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
SyncManager.parse_post(tag_model, hostname)
else:
raise SyncException(EXCEPTION_NODE.format(tag_status.text))
@staticmethod
@transaction.atomic
def parse_post(tag_model, hostname):
tag_content = tag_model.find(TAG_CONTENT)
content_str = et.tostring(tag_content, ENCODING_UNICODE)
tag_id = tag_content.find(TAG_ID)
global_id, exists = GlobalId.from_xml_element(tag_id)
signatures = SyncManager._verify_model(global_id, content_str, tag_model)
pub_time = tag_content.find(TAG_PUB_TIME).text
tag_update_time = tag_content.find(TAG_UPDATE_TIME)
if tag_update_time:
update_time = tag_content.find(TAG_UPDATE_TIME).text
else:
update_time = pub_time
is_old = exists and global_id.post.last_edit_time < parse_datetime(update_time)
if exists and not is_old:
logger.debug('Post {} exists and is up to date.'.format(global_id))
else:
global_id.content = content_str
global_id.save()
for signature in signatures:
signature.global_id = global_id
signature.save()
title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
thread = tag_content.find(TAG_THREAD)
tags = []
if thread:
thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
raise Exception(EXCEPTION_THREAD.format(global_id))
else:
opening_post = None
tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
tag, created = Tag.objects.get_or_create_with_alias(
name=tag_tag.text)
tags.append(tag)
# TODO Check that the replied posts are already present
# before adding new ones
files = []
urls = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
if attachment.get(ATTR_ID_TYPE) == ID_TYPE_URL:
urls.append(attachment.text)
else:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
attached_file = download(hostname + url, validate=False)
if attached_file is None:
raise SyncException(EXCEPTION_DOWNLOAD)
hash = get_file_hash(attached_file)
if hash != attachment.text:
raise SyncException(EXCEPTION_HASH)
files.append(attached_file)
if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, file_urls=urls,
tripcode=tripcode, version=version, last_edit_time=update_time)
logger.debug('Parsed updated post {}'.format(global_id))
else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files,
file_urls=urls, tripcode=tripcode, last_edit_time=update_time)
logger.debug('Parsed new post {}'.format(global_id))
@staticmethod
def generate_response_list(filters):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
posts = Post.objects.prefetch_related('global_id')
for post_filter in filters:
posts = post_filter.filter(posts)
for post in posts:
tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
post.global_id.to_xml_element(tag_id)
update_time = et.SubElement(tag_model, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def _verify_model(global_id, content_str, tag_model):
"""
Verifies all signatures for a single model.
"""
signatures = []
tag_signatures = tag_model.find(TAG_SIGNATURES)
has_author_signature = False
for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
if not KeyPair.objects.verify(signature, content_str):
raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
signatures.append(signature)
if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
return signatures
@staticmethod
def _attachment_to_xml(tag_attachments, tag_refs, attachment):
if tag_attachments is not None:
attachment_tag = et.SubElement(tag_attachments, TAG_ATTACHMENT)
if attachment.is_internal():
mimetype = get_file_mimetype(attachment.file.file)
attachment_tag.set(ATTR_MIMETYPE, mimetype)
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_MD5)
attachment_tag.text = attachment.hash
else:
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_URL)
attachment_tag.text = attachment.url
if tag_refs is not None and attachment.is_internal():
attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, attachment.hash)
attachment_ref.set(ATTR_URL, attachment.file.url)
@staticmethod
def generate_request_get(global_id_list: list):
"""
Form a get request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_GET)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, '1.0')
model.set(ATTR_NAME, 'post')
for global_id in global_id_list:
tag_id = et.SubElement(model, TAG_ID)
global_id.to_xml_element(tag_id)
return et.tostring(request, 'unicode')
@staticmethod
def generate_request_list(opening_post=None, tags=list(),
timestamp_from=None):
"""
Form a pull request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_LIST)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, CURRENT_MODEL_VERSION)
model.set(ATTR_NAME, 'post')
if opening_post:
ThreadFilter().add_filter(model, opening_post)
if tags:
TagsFilter().add_filter(model, tags)
if timestamp_from:
TimestampFromFilter().add_filter(model, timestamp_from)
return et.tostring(request, 'unicode')