##// END OF EJS Templates
Fixed tag parsing
Fixed tag parsing

File last commit:

r2104:2e6f7c21 default
r2104:2e6f7c21 default
Show More
sync.py
395 lines | 14.4 KiB | text/x-python | PythonLexer
neko259
Use update-time of a post instead of version
r1928 import logging
neko259
Updated sync method for requesting and getting a post
r1177 import xml.etree.ElementTree as et
neko259
Use update-time of a post instead of version
r1928
from django.db import transaction
from django.utils.dateparse import parse_datetime
neko259
Added images and attachments to the XML post response
r1506
neko259
Moved exceptions to a separate module
r1602 from boards.abstracts.exceptions import SyncException
neko259
Use update-time of a post instead of version
r1928 from boards.abstracts.sync_filters import ThreadFilter, TagsFilter, \
TimestampFromFilter
neko259
Moved exceptions to a separate module
r1602 from boards.models import KeyPair, GlobalId, Signature, Post, Tag
neko259
Download attached filed to the post during sync
r1511 from boards.models.attachment.downloaders import download
neko259
Added ability to filter posts in the LIST request
r1834 from boards.models.signature import TAG_REQUEST, ATTR_TYPE, TYPE_GET, \
ATTR_VERSION, TAG_MODEL, ATTR_NAME, TAG_ID, TYPE_LIST
neko259
Validate attachment's hash after downloading
r1518 from boards.utils import get_file_mimetype, get_file_hash
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Check that parsed post has a signature of its author's key
r1582 EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
neko259
Don't allow parsing a post when its thread is not parsed yet
r1838 EXCEPTION_THREAD = 'No thread exists for post {}'
neko259
Updated sync method for requesting and getting a post
r1177 ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
neko259
Use update-time of a post instead of version
r1928 TAG_UPDATE_TIME = 'update-time'
neko259
Updated sync method for requesting and getting a post
r1177 TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
neko259
Output and parse tags in OP
r1239 TAG_TAGS = 'tags'
TAG_TAG = 'tag'
neko259
Added local download links for attachments
r1508 TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
neko259
Added tripcode to the synced post items list
r1556 TAG_TRIPCODE = 'tripcode'
neko259
Added version to post content
r1569 TAG_VERSION = 'version'
neko259
Updated sync method for requesting and getting a post
r1177
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
neko259
Added signature verification for a post
r1237 ATTR_KEY = 'key'
neko259
Added local download links for attachments
r1508 ATTR_REF = 'ref'
ATTR_URL = 'url'
neko259
Added id-type attribute to attachments to specify a type of hash (currently only md5). Added management command to delete global id caches
r1560 ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
neko259
Allow syncing URL attachments
r1706 ID_TYPE_URL = 'url'
neko259
Updated sync method for requesting and getting a post
r1177
STATUS_SUCCESS = 'success'
neko259
Use update-time of a post instead of version
r1928 CURRENT_MODEL_VERSION = '1.1'
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 logger = logging.getLogger('boards.sync')
neko259
Updated sync method for requesting and getting a post
r1177 class SyncManager:
neko259
Made SyncManager's methods static
r1236 @staticmethod
def generate_response_get(model_list: list):
neko259
Updated sync method for requesting and getting a post
r1177 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 global_id = post.global_id
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Add attachment refs to the posts with saved content
r1525 attachments = post.attachments.all()
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 if global_id.content:
model.append(et.fromstring(global_id.content))
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Allow syncing URL attachments
r1706 internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file)
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
neko259
Fixed tag sync and tests
r1895 tag_tag.text = tag.get_name()
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
neko259
Added local download links for attachments
r1508
neko259
Use update-time of a post instead of version
r1928 update_time = et.SubElement(content_tag, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
neko259
Added tripcode to the synced post items list
r1556 if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
neko259
Allow syncing URL attachments
r1706
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
else:
attachment_refs = None
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520
for file in attachments:
SyncManager._attachment_to_xml(
neko259
Allow syncing URL attachments
r1706 attachments_tag, attachment_refs, file)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
neko259
Added images and attachments to the XML post response
r1506
neko259
Updated sync method for requesting and getting a post
r1177 signatures_tag = et.SubElement(model, TAG_SIGNATURES)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 post_signatures = global_id.signature_set.all()
neko259
Updated sync method for requesting and getting a post
r1177 if post_signatures:
neko259
Added some minor info
r1238 signatures = post_signatures
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 key = KeyPair.objects.get(public_key=global_id.key)
neko259
Sync fixes
r1386 signature = Signature(
neko259
Updated sync method for requesting and getting a post
r1177 key_type=key.key_type,
key=key.public_key,
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 signature=key.sign(global_id.content),
global_id=global_id,
neko259
Sync fixes
r1386 )
signature.save()
signatures = [signature]
neko259
Updated sync method for requesting and getting a post
r1177 for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
neko259
Added signature verification for a post
r1237 signature_tag.set(ATTR_KEY, signature.key)
neko259
Updated sync method for requesting and getting a post
r1177
return et.tostring(response, ENCODING_UNICODE)
neko259
Made SyncManager's methods static
r1236 @staticmethod
neko259
Download attached filed to the post during sync
r1511 def parse_response_get(response_xml, hostname):
neko259
Updated sync method for requesting and getting a post
r1177 tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 SyncManager.parse_post(tag_model, hostname)
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_NODE.format(tag_status.text))
neko259
Added signature verification for a post
r1237
@staticmethod
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 @transaction.atomic
def parse_post(tag_model, hostname):
tag_content = tag_model.find(TAG_CONTENT)
content_str = et.tostring(tag_content, ENCODING_UNICODE)
tag_id = tag_content.find(TAG_ID)
global_id, exists = GlobalId.from_xml_element(tag_id)
signatures = SyncManager._verify_model(global_id, content_str, tag_model)
neko259
If there is no post update time, use pub time
r2098 pub_time = tag_content.find(TAG_PUB_TIME).text
tag_update_time = tag_content.find(TAG_UPDATE_TIME)
if tag_update_time:
update_time = tag_content.find(TAG_UPDATE_TIME).text
else:
update_time = pub_time
neko259
Use update-time of a post instead of version
r1928 is_old = exists and global_id.post.last_edit_time < parse_datetime(update_time)
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 if exists and not is_old:
neko259
Use proper logging message when the synced post is up to date
r1848 logger.debug('Post {} exists and is up to date.'.format(global_id))
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 else:
global_id.content = content_str
global_id.save()
for signature in signatures:
signature.global_id = global_id
signature.save()
title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
thread = tag_content.find(TAG_THREAD)
tags = []
if thread:
thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
neko259
Don't allow parsing a post when its thread is not parsed yet
r1838 raise Exception(EXCEPTION_THREAD.format(global_id))
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 else:
opening_post = None
tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
neko259
Fixed tag parsing
r2104 tag, created = Tag.objects.get_or_create_with_alias(
name=tag_tag.text)
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 tags.append(tag)
# TODO Check that the replied posts are already present
# before adding new ones
files = []
urls = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
if attachment.get(ATTR_ID_TYPE) == ID_TYPE_URL:
urls.append(attachment.text)
else:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
neko259
Do not run validation on files while performing sync
r1833 attached_file = download(hostname + url, validate=False)
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 if attached_file is None:
raise SyncException(EXCEPTION_DOWNLOAD)
hash = get_file_hash(attached_file)
if hash != attachment.text:
raise SyncException(EXCEPTION_HASH)
files.append(attached_file)
if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, file_urls=urls,
neko259
Use update-time of a post instead of version
r1928 tripcode=tripcode, version=version, last_edit_time=update_time)
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 logger.debug('Parsed updated post {}'.format(global_id))
else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files,
neko259
Use update-time of a post instead of version
r1928 file_urls=urls, tripcode=tripcode, last_edit_time=update_time)
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 logger.debug('Parsed new post {}'.format(global_id))
@staticmethod
neko259
Added ability to filter posts in the LIST request
r1834 def generate_response_list(filters):
neko259
Added PULL protocol method implementation without any filters
r1321 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
neko259
Added ability to filter posts in the LIST request
r1834 posts = Post.objects.prefetch_related('global_id')
for post_filter in filters:
posts = post_filter.filter(posts)
for post in posts:
neko259
Added post versions to list request, changed list request to include additional attributes
r1571 tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
neko259
Added PULL protocol method implementation without any filters
r1321 post.global_id.to_xml_element(tag_id)
neko259
Use update-time of a post instead of version
r1928 update_time = et.SubElement(tag_model, TAG_UPDATE_TIME)
update_time.text = str(post.last_edit_time)
neko259
Added PULL protocol method implementation without any filters
r1321
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
neko259
Check that parsed post has a signature of its author's key
r1582 def _verify_model(global_id, content_str, tag_model):
neko259
Added signature verification for a post
r1237 """
Verifies all signatures for a single model.
"""
neko259
Save signatures when the post is parsed for the later use
r1244 signatures = []
neko259
Added signature verification for a post
r1237
tag_signatures = tag_model.find(TAG_SIGNATURES)
neko259
Check that parsed post has a signature of its author's key
r1582 has_author_signature = False
neko259
Added signature verification for a post
r1237 for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
neko259
Check that parsed post has a signature of its author's key
r1582 if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
neko259
Save signatures when the post is parsed for the later use
r1244 signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
neko259
Sync fixes
r1386
neko259
Don't stringify content tag twice
r1537 if not KeyPair.objects.verify(signature, content_str):
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
neko259
Sync fixes
r1386
signatures.append(signature)
neko259
Check that parsed post has a signature of its author's key
r1582 if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
neko259
Added signature verification for a post
r1237
neko259
Save signatures when the post is parsed for the later use
r1244 return signatures
neko259
Added local download links for attachments
r1508
@staticmethod
neko259
Allow syncing URL attachments
r1706 def _attachment_to_xml(tag_attachments, tag_refs, attachment):
neko259
Add attachment refs to the posts with saved content
r1525 if tag_attachments is not None:
neko259
Allow syncing URL attachments
r1706 attachment_tag = et.SubElement(tag_attachments, TAG_ATTACHMENT)
if attachment.is_internal():
mimetype = get_file_mimetype(attachment.file.file)
attachment_tag.set(ATTR_MIMETYPE, mimetype)
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_MD5)
attachment_tag.text = attachment.hash
else:
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_URL)
attachment_tag.text = attachment.url
neko259
Added local download links for attachments
r1508
neko259
Do not try to add an attachment ref for external attachment if there is an internal and external attachment in the same post
r1839 if tag_refs is not None and attachment.is_internal():
neko259
Allow syncing URL attachments
r1706 attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, attachment.hash)
neko259
Fixed syncing posts with internal attachments
r1707 attachment_ref.set(ATTR_URL, attachment.file.url)
neko259
Added ability to filter posts in the LIST request
r1834
@staticmethod
def generate_request_get(global_id_list: list):
"""
Form a get request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_GET)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
model.set(ATTR_VERSION, '1.0')
model.set(ATTR_NAME, 'post')
for global_id in global_id_list:
tag_id = et.SubElement(model, TAG_ID)
global_id.to_xml_element(tag_id)
return et.tostring(request, 'unicode')
@staticmethod
neko259
Allow syncing posts from timestamp
r1846 def generate_request_list(opening_post=None, tags=list(),
timestamp_from=None):
neko259
Added ability to filter posts in the LIST request
r1834 """
Form a pull request from a list of ModelId objects.
"""
request = et.Element(TAG_REQUEST)
request.set(ATTR_TYPE, TYPE_LIST)
request.set(ATTR_VERSION, '1.0')
model = et.SubElement(request, TAG_MODEL)
neko259
Use update-time of a post instead of version
r1928 model.set(ATTR_VERSION, CURRENT_MODEL_VERSION)
neko259
Added ability to filter posts in the LIST request
r1834 model.set(ATTR_NAME, 'post')
if opening_post:
ThreadFilter().add_filter(model, opening_post)
neko259
Allow filtering sync by tags
r1844 if tags:
TagsFilter().add_filter(model, tags)
neko259
Allow syncing posts from timestamp
r1846 if timestamp_from:
TimestampFromFilter().add_filter(model, timestamp_from)
neko259
Added ability to filter posts in the LIST request
r1834
neko259
Don't allow parsing a post when its thread is not parsed yet
r1838 return et.tostring(request, 'unicode')