##// END OF EJS Templates
If found several images with the same hash, use the equal one as duplicate not the first one with the hash
If found several images with the same hash, use the equal one as duplicate not the first one with the hash

File last commit:

r1848:6a5309f4 default
r1856:969bd865 default
Show More
sync.py
385 lines | 14.1 KiB | text/x-python | PythonLexer
import xml.etree.ElementTree as et
import logging
from xml.etree import ElementTree
from boards.abstracts.exceptions import SyncException
from boards.abstracts.sync_filters import ThreadFilter, TagsFilter,\
TimestampFromFilter
from boards.models import KeyPair, GlobalId, Signature, Post, Tag
from boards.models.attachment.downloaders import download
from boards.models.signature import TAG_REQUEST, ATTR_TYPE, TYPE_GET, \
ATTR_VERSION, TAG_MODEL, ATTR_NAME, TAG_ID, TYPE_LIST
from boards.utils import get_file_mimetype, get_file_hash
from django.db import transaction
EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
EXCEPTION_THREAD = 'No thread exists for post {}'
ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
TAG_TAGS = 'tags'
TAG_TAG = 'tag'
TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
TAG_TRIPCODE = 'tripcode'
TAG_VERSION = 'version'
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
ATTR_KEY = 'key'
ATTR_REF = 'ref'
ATTR_URL = 'url'
ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
ID_TYPE_URL = 'url'
STATUS_SUCCESS = 'success'
logger = logging.getLogger('boards.sync')
class SyncManager:
@staticmethod
def generate_response_get(model_list: list):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
global_id = post.global_id
attachments = post.attachments.all()
if global_id.content:
model.append(et.fromstring(global_id.content))
if len(attachments) > 0:
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file)
else:
content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
tag_tag.text = tag.name
else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
if len(attachments) > 0:
attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
else:
attachment_refs = None
for file in attachments:
SyncManager._attachment_to_xml(
attachments_tag, attachment_refs, file)
version_tag = et.SubElement(content_tag, TAG_VERSION)
version_tag.text = str(post.version)
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
signatures_tag = et.SubElement(model, TAG_SIGNATURES)
post_signatures = global_id.signature_set.all()
if post_signatures:
signatures = post_signatures
else:
key = KeyPair.objects.get(public_key=global_id.key)
signature = Signature(
key_type=key.key_type,
key=key.public_key,
signature=key.sign(global_id.content),
global_id=global_id,
)
signature.save()
signatures = [signature]
for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
signature_tag.set(ATTR_KEY, signature.key)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def parse_response_get(response_xml, hostname):
tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
SyncManager.parse_post(tag_model, hostname)
else:
raise SyncException(EXCEPTION_NODE.format(tag_status.text))
@staticmethod
@transaction.atomic
def parse_post(tag_model, hostname):
tag_content = tag_model.find(TAG_CONTENT)
content_str = et.tostring(tag_content, ENCODING_UNICODE)
tag_id = tag_content.find(TAG_ID)
global_id, exists = GlobalId.from_xml_element(tag_id)
signatures = SyncManager._verify_model(global_id, content_str, tag_model)
version = int(tag_content.find(TAG_VERSION).text)
is_old = exists and global_id.post.version < version
if exists and not is_old:
logger.debug('Post {} exists and is up to date.'.format(global_id))
else:
global_id.content = content_str
global_id.save()
for signature in signatures:
signature.global_id = global_id
signature.save()
title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
pub_time = tag_content.find(TAG_PUB_TIME).text
tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
thread = tag_content.find(TAG_THREAD)
tags = []
if thread:
thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
raise Exception(EXCEPTION_THREAD.format(global_id))
else:
opening_post = None
tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
tag, created = Tag.objects.get_or_create(
name=tag_tag.text)
tags.append(tag)
# TODO Check that the replied posts are already present
# before adding new ones
files = []
urls = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
if attachment.get(ATTR_ID_TYPE) == ID_TYPE_URL:
urls.append(attachment.text)
else:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
attached_file = download(hostname + url, validate=False)
if attached_file is None:
raise SyncException(EXCEPTION_DOWNLOAD)
hash = get_file_hash(attached_file)
if hash != attachment.text:
raise SyncException(EXCEPTION_HASH)
files.append(attached_file)
if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, file_urls=urls,
tripcode=tripcode, version=version)
logger.debug('Parsed updated post {}'.format(global_id))
else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files,
file_urls=urls, tripcode=tripcode,
version=version)
logger.debug('Parsed new post {}'.format(global_id))
@staticmethod
def generate_response_list(filters):
response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
posts = Post.objects.prefetch_related('global_id')
for post_filter in filters:
posts = post_filter.filter(posts)
for post in posts:
tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
post.global_id.to_xml_element(tag_id)
tag_version = et.SubElement(tag_model, TAG_VERSION)
tag_version.text = str(post.version)
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
def _verify_model(global_id, content_str, tag_model):
"""
Verifies all signatures for a single model.
"""
signatures = []
tag_signatures = tag_model.find(TAG_SIGNATURES)
has_author_signature = False
for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
if not KeyPair.objects.verify(signature, content_str):
raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
signatures.append(signature)
if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
return signatures
@staticmethod
def _attachment_to_xml(tag_attachments, tag_refs, attachment):
if tag_attachments is not None:
attachment_tag = et.SubElement(tag_attachments, TAG_ATTACHMENT)
if attachment.is_internal():
mimetype = get_file_mimetype(attachment.file.file)
attachment_tag.set(ATTR_MIMETYPE, mimetype)
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_MD5)
attachment_tag.text = attachment.hash
else:
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_URL)
attachment_tag.text = attachment.url
if tag_refs is not None and attachment.is_internal():
attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, attachment.hash)
attachment_ref.set(ATTR_URL, attachment.file.url)
@staticmethod
def generate_request_get(global_id_list: list):
"""
Form a get request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_GET)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, '1.0')
model.set(ATTR_NAME, 'post')
for global_id in global_id_list:
tag_id = et.SubElement(model, TAG_ID)
global_id.to_xml_element(tag_id)
return et.tostring(request, 'unicode')
@staticmethod
def generate_request_list(opening_post=None, tags=list(),
timestamp_from=None):
"""
Form a pull request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_LIST)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, '1.0')
model.set(ATTR_NAME, 'post')
if opening_post:
ThreadFilter().add_filter(model, opening_post)
if tags:
TagsFilter().add_filter(model, tags)
if timestamp_from:
TimestampFromFilter().add_filter(model, timestamp_from)
return et.tostring(request, 'unicode')