##// END OF EJS Templates
Post URL loading optimizations
Post URL loading optimizations

File last commit:

r1624:bea11db5 default
r1668:b8867a5d default
Show More
sync.py
302 lines | 11.7 KiB | text/x-python | PythonLexer
neko259
Updated sync method for requesting and getting a post
r1177 import xml.etree.ElementTree as et
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 import logging
neko259
Added images and attachments to the XML post response
r1506
neko259
Moved exceptions to a separate module
r1602 from boards.abstracts.exceptions import SyncException
from boards.models import KeyPair, GlobalId, Signature, Post, Tag
neko259
Download attached filed to the post during sync
r1511 from boards.models.attachment.downloaders import download
neko259
Validate attachment's hash after downloading
r1518 from boards.utils import get_file_mimetype, get_file_hash
neko259
Sync-import of a single post is working
r1229 from django.db import transaction
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Check that parsed post has a signature of its author's key
r1582 EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_OP = 'Load the OP first.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
neko259
Updated sync method for requesting and getting a post
r1177 ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
neko259
Output and parse tags in OP
r1239 TAG_TAGS = 'tags'
TAG_TAG = 'tag'
neko259
Added local download links for attachments
r1508 TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
neko259
Added tripcode to the synced post items list
r1556 TAG_TRIPCODE = 'tripcode'
neko259
Added version to post content
r1569 TAG_VERSION = 'version'
neko259
Updated sync method for requesting and getting a post
r1177
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
neko259
Added signature verification for a post
r1237 ATTR_KEY = 'key'
neko259
Added local download links for attachments
r1508 ATTR_REF = 'ref'
ATTR_URL = 'url'
neko259
Added id-type attribute to attachments to specify a type of hash (currently only md5). Added management command to delete global id caches
r1560 ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
neko259
Updated sync method for requesting and getting a post
r1177
STATUS_SUCCESS = 'success'
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 logger = logging.getLogger('boards.sync')
neko259
Updated sync method for requesting and getting a post
r1177 class SyncManager:
neko259
Made SyncManager's methods static
r1236 @staticmethod
def generate_response_get(model_list: list):
neko259
Updated sync method for requesting and getting a post
r1177 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 global_id = post.global_id
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Add attachment refs to the posts with saved content
r1525 attachments = post.attachments.all()
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 if global_id.content:
model.append(et.fromstring(global_id.content))
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Add attachment refs to the posts with saved content
r1525 attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file.file.file,
file.hash, file.file.url)
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
tag_tag.text = tag.name
else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
neko259
Added local download links for attachments
r1508
neko259
Added tripcode to the synced post items list
r1556 if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
attachments_tag, attachment_refs, file.file.file,
file.hash, file.file.url)
neko259
Added version to post content
r1569 version_tag = et.SubElement(content_tag, TAG_VERSION)
version_tag.text = str(post.version)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
neko259
Added images and attachments to the XML post response
r1506
neko259
Updated sync method for requesting and getting a post
r1177 signatures_tag = et.SubElement(model, TAG_SIGNATURES)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 post_signatures = global_id.signature_set.all()
neko259
Updated sync method for requesting and getting a post
r1177 if post_signatures:
neko259
Added some minor info
r1238 signatures = post_signatures
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 key = KeyPair.objects.get(public_key=global_id.key)
neko259
Sync fixes
r1386 signature = Signature(
neko259
Updated sync method for requesting and getting a post
r1177 key_type=key.key_type,
key=key.public_key,
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 signature=key.sign(global_id.content),
global_id=global_id,
neko259
Sync fixes
r1386 )
signature.save()
signatures = [signature]
neko259
Updated sync method for requesting and getting a post
r1177 for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
neko259
Added signature verification for a post
r1237 signature_tag.set(ATTR_KEY, signature.key)
neko259
Updated sync method for requesting and getting a post
r1177
return et.tostring(response, ENCODING_UNICODE)
neko259
Made SyncManager's methods static
r1236 @staticmethod
neko259
Sync-import of a single post is working
r1229 @transaction.atomic
neko259
Download attached filed to the post during sync
r1511 def parse_response_get(response_xml, hostname):
neko259
Updated sync method for requesting and getting a post
r1177 tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
tag_content = tag_model.find(TAG_CONTENT)
neko259
Added signature verification for a post
r1237
neko259
Don't stringify content tag twice
r1537 content_str = et.tostring(tag_content, ENCODING_UNICODE)
neko259
Added signature verification for a post
r1237
neko259
Updated sync method for requesting and getting a post
r1177 tag_id = tag_content.find(TAG_ID)
neko259
Refactored code for getting existing and new global ids
r1233 global_id, exists = GlobalId.from_xml_element(tag_id)
neko259
Check that parsed post has a signature of its author's key
r1582 signatures = SyncManager._verify_model(global_id, content_str, tag_model)
neko259
Don't allow to import the same post twice
r1232
neko259
Process updated posts from sync server
r1586 version = int(tag_content.find(TAG_VERSION).text)
is_old = exists and global_id.post.version < version
if exists and not is_old:
print('Post with same ID exists and is up to date.')
neko259
Refactored code for getting existing and new global ids
r1233 else:
neko259
Don't stringify content tag twice
r1537 global_id.content = content_str
neko259
Refactored code for getting existing and new global ids
r1233 global_id.save()
neko259
Save signatures when the post is parsed for the later use
r1244 for signature in signatures:
signature.global_id = global_id
signature.save()
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Download attached filed to the post during sync
r1511 title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
neko259
Refactored code for getting existing and new global ids
r1233 pub_time = tag_content.find(TAG_PUB_TIME).text
neko259
Fixed issue with tripcode
r1564 tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
neko259
Don't allow to import the same post twice
r1232
neko259
Refactored code for getting existing and new global ids
r1233 thread = tag_content.find(TAG_THREAD)
neko259
Output and parse tags in OP
r1239 tags = []
neko259
Refactored code for getting existing and new global ids
r1233 if thread:
neko259
Sync fixes
r1386 thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_OP)
neko259
Refactored code for getting existing and new global ids
r1233 else:
opening_post = None
neko259
Output and parse tags in OP
r1239 tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
neko259
Sync fixes
r1386 tag, created = Tag.objects.get_or_create(
name=tag_tag.text)
neko259
Output and parse tags in OP
r1239 tags.append(tag)
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Refactored code for getting existing and new global ids
r1233 # TODO Check that the replied posts are already present
# before adding new ones
neko259
Sync-import of a single post is working
r1229
neko259
Download attached filed to the post during sync
r1511 files = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
attached_file = download(hostname + url)
if attached_file is None:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_DOWNLOAD)
neko259
Validate attachment's hash after downloading
r1518
neko259
Fixed issue in sync
r1524 hash = get_file_hash(attached_file)
neko259
Validate attachment's hash after downloading
r1518 if hash != attachment.text:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_HASH)
neko259
Validate attachment's hash after downloading
r1518
neko259
Download attached filed to the post during sync
r1511 files.append(attached_file)
neko259
Added some minor info
r1238
neko259
Process updated posts from sync server
r1586 if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, tripcode=tripcode,
version=version)
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 logger.debug('Parsed updated post {}'.format(global_id))
neko259
Process updated posts from sync server
r1586 else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files, tripcode=tripcode,
version=version)
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 logger.debug('Parsed new post {}'.format(global_id))
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_NODE.format(tag_status.text))
neko259
Added signature verification for a post
r1237
@staticmethod
neko259
Rename "pull" request to "list"
r1566 def generate_response_list():
neko259
Added PULL protocol method implementation without any filters
r1321 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
neko259
Load global id to pull posts
r1562 for post in Post.objects.prefetch_related('global_id').all():
neko259
Added post versions to list request, changed list request to include additional attributes
r1571 tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
neko259
Added PULL protocol method implementation without any filters
r1321 post.global_id.to_xml_element(tag_id)
neko259
Added post versions to list request, changed list request to include additional attributes
r1571 tag_version = et.SubElement(tag_model, TAG_VERSION)
neko259
Fixed sync issue
r1573 tag_version.text = str(post.version)
neko259
Added PULL protocol method implementation without any filters
r1321
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
neko259
Check that parsed post has a signature of its author's key
r1582 def _verify_model(global_id, content_str, tag_model):
neko259
Added signature verification for a post
r1237 """
Verifies all signatures for a single model.
"""
neko259
Save signatures when the post is parsed for the later use
r1244 signatures = []
neko259
Added signature verification for a post
r1237
tag_signatures = tag_model.find(TAG_SIGNATURES)
neko259
Check that parsed post has a signature of its author's key
r1582 has_author_signature = False
neko259
Added signature verification for a post
r1237 for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
neko259
Check that parsed post has a signature of its author's key
r1582 if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
neko259
Save signatures when the post is parsed for the later use
r1244 signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
neko259
Sync fixes
r1386
neko259
Don't stringify content tag twice
r1537 if not KeyPair.objects.verify(signature, content_str):
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
neko259
Sync fixes
r1386
signatures.append(signature)
neko259
Check that parsed post has a signature of its author's key
r1582 if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
neko259
Added signature verification for a post
r1237
neko259
Save signatures when the post is parsed for the later use
r1244 return signatures
neko259
Added local download links for attachments
r1508
@staticmethod
def _attachment_to_xml(tag_attachments, tag_refs, file, hash, url):
neko259
Add attachment refs to the posts with saved content
r1525 if tag_attachments is not None:
mimetype = get_file_mimetype(file)
attachment = et.SubElement(tag_attachments, TAG_ATTACHMENT)
attachment.set(ATTR_MIMETYPE, mimetype)
neko259
Added id-type attribute to attachments to specify a type of hash (currently only md5). Added management command to delete global id caches
r1560 attachment.set(ATTR_ID_TYPE, ID_TYPE_MD5)
neko259
Add attachment refs to the posts with saved content
r1525 attachment.text = hash
neko259
Added local download links for attachments
r1508
attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, hash)
attachment_ref.set(ATTR_URL, url)