##// END OF EJS Templates
Fixed image search by google. Added duplicates search for images
Fixed image search by google. Added duplicates search for images

File last commit:

r1800:e64666cc default
r1802:5f0f8146 default
Show More
sync.py
331 lines | 12.4 KiB | text/x-python | PythonLexer
neko259
Updated sync method for requesting and getting a post
r1177 import xml.etree.ElementTree as et
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 import logging
neko259
Added images and attachments to the XML post response
r1506
neko259
Moved exceptions to a separate module
r1602 from boards.abstracts.exceptions import SyncException
from boards.models import KeyPair, GlobalId, Signature, Post, Tag
neko259
Download attached filed to the post during sync
r1511 from boards.models.attachment.downloaders import download
neko259
Validate attachment's hash after downloading
r1518 from boards.utils import get_file_mimetype, get_file_hash
neko259
Sync-import of a single post is working
r1229 from django.db import transaction
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Check that parsed post has a signature of its author's key
r1582 EXCEPTION_NODE = 'Sync node returned an error: {}.'
EXCEPTION_DOWNLOAD = 'File was not downloaded.'
EXCEPTION_HASH = 'File hash does not match attachment hash.'
EXCEPTION_SIGNATURE = 'Invalid model signature for {}.'
EXCEPTION_AUTHOR_SIGNATURE = 'Model {} has no author signature.'
neko259
Updated sync method for requesting and getting a post
r1177 ENCODING_UNICODE = 'unicode'
TAG_MODEL = 'model'
TAG_REQUEST = 'request'
TAG_RESPONSE = 'response'
TAG_ID = 'id'
TAG_STATUS = 'status'
TAG_MODELS = 'models'
TAG_TITLE = 'title'
TAG_TEXT = 'text'
TAG_THREAD = 'thread'
TAG_PUB_TIME = 'pub-time'
TAG_SIGNATURES = 'signatures'
TAG_SIGNATURE = 'signature'
TAG_CONTENT = 'content'
TAG_ATTACHMENTS = 'attachments'
TAG_ATTACHMENT = 'attachment'
neko259
Output and parse tags in OP
r1239 TAG_TAGS = 'tags'
TAG_TAG = 'tag'
neko259
Added local download links for attachments
r1508 TAG_ATTACHMENT_REFS = 'attachment-refs'
TAG_ATTACHMENT_REF = 'attachment-ref'
neko259
Added tripcode to the synced post items list
r1556 TAG_TRIPCODE = 'tripcode'
neko259
Added version to post content
r1569 TAG_VERSION = 'version'
neko259
Updated sync method for requesting and getting a post
r1177
TYPE_GET = 'get'
ATTR_VERSION = 'version'
ATTR_TYPE = 'type'
ATTR_NAME = 'name'
ATTR_VALUE = 'value'
ATTR_MIMETYPE = 'mimetype'
neko259
Added signature verification for a post
r1237 ATTR_KEY = 'key'
neko259
Added local download links for attachments
r1508 ATTR_REF = 'ref'
ATTR_URL = 'url'
neko259
Added id-type attribute to attachments to specify a type of hash (currently only md5). Added management command to delete global id caches
r1560 ATTR_ID_TYPE = 'id-type'
ID_TYPE_MD5 = 'md5'
neko259
Allow syncing URL attachments
r1706 ID_TYPE_URL = 'url'
neko259
Updated sync method for requesting and getting a post
r1177
STATUS_SUCCESS = 'success'
neko259
Set default query split to 10. Use logger to log post being parsed
r1624 logger = logging.getLogger('boards.sync')
neko259
Updated sync method for requesting and getting a post
r1177 class SyncManager:
neko259
Made SyncManager's methods static
r1236 @staticmethod
def generate_response_get(model_list: list):
neko259
Updated sync method for requesting and getting a post
r1177 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
for post in model_list:
model = et.SubElement(models, TAG_MODEL)
model.set(ATTR_NAME, 'post')
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 global_id = post.global_id
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Add attachment refs to the posts with saved content
r1525 attachments = post.attachments.all()
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 if global_id.content:
model.append(et.fromstring(global_id.content))
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Allow syncing URL attachments
r1706 internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
for file in attachments:
SyncManager._attachment_to_xml(
None, attachment_refs, file)
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 content_tag = et.SubElement(model, TAG_CONTENT)
tag_id = et.SubElement(content_tag, TAG_ID)
global_id.to_xml_element(tag_id)
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 title = et.SubElement(content_tag, TAG_TITLE)
title.text = post.title
text = et.SubElement(content_tag, TAG_TEXT)
text.text = post.get_sync_text()
neko259
Updated sync method for requesting and getting a post
r1177
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 thread = post.get_thread()
if post.is_opening():
tag_tags = et.SubElement(content_tag, TAG_TAGS)
for tag in thread.get_tags():
tag_tag = et.SubElement(tag_tags, TAG_TAG)
tag_tag.text = tag.name
else:
tag_thread = et.SubElement(content_tag, TAG_THREAD)
thread_id = et.SubElement(tag_thread, TAG_ID)
thread.get_opening_post().global_id.to_xml_element(thread_id)
pub_time = et.SubElement(content_tag, TAG_PUB_TIME)
pub_time.text = str(post.get_pub_time_str())
neko259
Added local download links for attachments
r1508
neko259
Added tripcode to the synced post items list
r1556 if post.tripcode:
tripcode = et.SubElement(content_tag, TAG_TRIPCODE)
tripcode.text = post.tripcode
neko259
Store images as regular attachments instead of separate model
r1590 if len(attachments) > 0:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 attachments_tag = et.SubElement(content_tag, TAG_ATTACHMENTS)
neko259
Allow syncing URL attachments
r1706
internal_attachments = False
for attachment in attachments:
if attachment.is_internal():
internal_attachments = True
break
if internal_attachments:
attachment_refs = et.SubElement(model, TAG_ATTACHMENT_REFS)
else:
attachment_refs = None
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520
for file in attachments:
SyncManager._attachment_to_xml(
neko259
Allow syncing URL attachments
r1706 attachments_tag, attachment_refs, file)
neko259
Added version to post content
r1569 version_tag = et.SubElement(content_tag, TAG_VERSION)
version_tag.text = str(post.version)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520
global_id.content = et.tostring(content_tag, ENCODING_UNICODE)
global_id.save()
neko259
Added images and attachments to the XML post response
r1506
neko259
Updated sync method for requesting and getting a post
r1177 signatures_tag = et.SubElement(model, TAG_SIGNATURES)
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 post_signatures = global_id.signature_set.all()
neko259
Updated sync method for requesting and getting a post
r1177 if post_signatures:
neko259
Added some minor info
r1238 signatures = post_signatures
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 key = KeyPair.objects.get(public_key=global_id.key)
neko259
Sync fixes
r1386 signature = Signature(
neko259
Updated sync method for requesting and getting a post
r1177 key_type=key.key_type,
key=key.public_key,
neko259
Delete global ID when deleting post. Cache model's content XML tag into global ID
r1520 signature=key.sign(global_id.content),
global_id=global_id,
neko259
Sync fixes
r1386 )
signature.save()
signatures = [signature]
neko259
Updated sync method for requesting and getting a post
r1177 for signature in signatures:
signature_tag = et.SubElement(signatures_tag, TAG_SIGNATURE)
signature_tag.set(ATTR_TYPE, signature.key_type)
signature_tag.set(ATTR_VALUE, signature.signature)
neko259
Added signature verification for a post
r1237 signature_tag.set(ATTR_KEY, signature.key)
neko259
Updated sync method for requesting and getting a post
r1177
return et.tostring(response, ENCODING_UNICODE)
neko259
Made SyncManager's methods static
r1236 @staticmethod
neko259
Download attached filed to the post during sync
r1511 def parse_response_get(response_xml, hostname):
neko259
Updated sync method for requesting and getting a post
r1177 tag_root = et.fromstring(response_xml)
tag_status = tag_root.find(TAG_STATUS)
if STATUS_SUCCESS == tag_status.text:
tag_models = tag_root.find(TAG_MODELS)
for tag_model in tag_models:
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 SyncManager.parse_post(tag_model, hostname)
neko259
Updated sync method for requesting and getting a post
r1177 else:
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_NODE.format(tag_status.text))
neko259
Added signature verification for a post
r1237
@staticmethod
neko259
Parse url attachments by the sync manager. Parse each post in a separate transaction. Default number of posts in one GET request is 1
r1800 @transaction.atomic
def parse_post(tag_model, hostname):
tag_content = tag_model.find(TAG_CONTENT)
content_str = et.tostring(tag_content, ENCODING_UNICODE)
tag_id = tag_content.find(TAG_ID)
global_id, exists = GlobalId.from_xml_element(tag_id)
signatures = SyncManager._verify_model(global_id, content_str, tag_model)
version = int(tag_content.find(TAG_VERSION).text)
is_old = exists and global_id.post.version < version
if exists and not is_old:
print('Post with same ID exists and is up to date.')
else:
global_id.content = content_str
global_id.save()
for signature in signatures:
signature.global_id = global_id
signature.save()
title = tag_content.find(TAG_TITLE).text or ''
text = tag_content.find(TAG_TEXT).text or ''
pub_time = tag_content.find(TAG_PUB_TIME).text
tripcode_tag = tag_content.find(TAG_TRIPCODE)
if tripcode_tag is not None:
tripcode = tripcode_tag.text or ''
else:
tripcode = ''
thread = tag_content.find(TAG_THREAD)
tags = []
if thread:
thread_id = thread.find(TAG_ID)
op_global_id, exists = GlobalId.from_xml_element(thread_id)
if exists:
opening_post = Post.objects.get(global_id=op_global_id)
else:
logger.debug('No thread exists for post {}'.format(global_id))
else:
opening_post = None
tag_tags = tag_content.find(TAG_TAGS)
for tag_tag in tag_tags:
tag, created = Tag.objects.get_or_create(
name=tag_tag.text)
tags.append(tag)
# TODO Check that the replied posts are already present
# before adding new ones
files = []
urls = []
tag_attachments = tag_content.find(TAG_ATTACHMENTS) or list()
tag_refs = tag_model.find(TAG_ATTACHMENT_REFS)
for attachment in tag_attachments:
if attachment.get(ATTR_ID_TYPE) == ID_TYPE_URL:
urls.append(attachment.text)
else:
tag_ref = tag_refs.find("{}[@ref='{}']".format(
TAG_ATTACHMENT_REF, attachment.text))
url = tag_ref.get(ATTR_URL)
attached_file = download(hostname + url)
if attached_file is None:
raise SyncException(EXCEPTION_DOWNLOAD)
hash = get_file_hash(attached_file)
if hash != attachment.text:
raise SyncException(EXCEPTION_HASH)
files.append(attached_file)
if is_old:
post = global_id.post
Post.objects.update_post(
post, title=title, text=text, pub_time=pub_time,
tags=tags, files=files, file_urls=urls,
tripcode=tripcode, version=version)
logger.debug('Parsed updated post {}'.format(global_id))
else:
Post.objects.import_post(
title=title, text=text, pub_time=pub_time,
opening_post=opening_post, tags=tags,
global_id=global_id, files=files,
file_urls=urls, tripcode=tripcode,
version=version)
logger.debug('Parsed new post {}'.format(global_id))
@staticmethod
neko259
Rename "pull" request to "list"
r1566 def generate_response_list():
neko259
Added PULL protocol method implementation without any filters
r1321 response = et.Element(TAG_RESPONSE)
status = et.SubElement(response, TAG_STATUS)
status.text = STATUS_SUCCESS
models = et.SubElement(response, TAG_MODELS)
neko259
Load global id to pull posts
r1562 for post in Post.objects.prefetch_related('global_id').all():
neko259
Added post versions to list request, changed list request to include additional attributes
r1571 tag_model = et.SubElement(models, TAG_MODEL)
tag_id = et.SubElement(tag_model, TAG_ID)
neko259
Added PULL protocol method implementation without any filters
r1321 post.global_id.to_xml_element(tag_id)
neko259
Added post versions to list request, changed list request to include additional attributes
r1571 tag_version = et.SubElement(tag_model, TAG_VERSION)
neko259
Fixed sync issue
r1573 tag_version.text = str(post.version)
neko259
Added PULL protocol method implementation without any filters
r1321
return et.tostring(response, ENCODING_UNICODE)
@staticmethod
neko259
Check that parsed post has a signature of its author's key
r1582 def _verify_model(global_id, content_str, tag_model):
neko259
Added signature verification for a post
r1237 """
Verifies all signatures for a single model.
"""
neko259
Save signatures when the post is parsed for the later use
r1244 signatures = []
neko259
Added signature verification for a post
r1237
tag_signatures = tag_model.find(TAG_SIGNATURES)
neko259
Check that parsed post has a signature of its author's key
r1582 has_author_signature = False
neko259
Added signature verification for a post
r1237 for tag_signature in tag_signatures:
signature_type = tag_signature.get(ATTR_TYPE)
signature_value = tag_signature.get(ATTR_VALUE)
signature_key = tag_signature.get(ATTR_KEY)
neko259
Check that parsed post has a signature of its author's key
r1582 if global_id.key_type == signature_type and\
global_id.key == signature_key:
has_author_signature = True
neko259
Save signatures when the post is parsed for the later use
r1244 signature = Signature(key_type=signature_type,
key=signature_key,
signature=signature_value)
neko259
Sync fixes
r1386
neko259
Don't stringify content tag twice
r1537 if not KeyPair.objects.verify(signature, content_str):
neko259
Moved exception texts to constants for sync
r1539 raise SyncException(EXCEPTION_SIGNATURE.format(content_str))
neko259
Sync fixes
r1386
signatures.append(signature)
neko259
Check that parsed post has a signature of its author's key
r1582 if not has_author_signature:
raise SyncException(EXCEPTION_AUTHOR_SIGNATURE.format(content_str))
neko259
Added signature verification for a post
r1237
neko259
Save signatures when the post is parsed for the later use
r1244 return signatures
neko259
Added local download links for attachments
r1508
@staticmethod
neko259
Allow syncing URL attachments
r1706 def _attachment_to_xml(tag_attachments, tag_refs, attachment):
neko259
Add attachment refs to the posts with saved content
r1525 if tag_attachments is not None:
neko259
Allow syncing URL attachments
r1706 attachment_tag = et.SubElement(tag_attachments, TAG_ATTACHMENT)
if attachment.is_internal():
mimetype = get_file_mimetype(attachment.file.file)
attachment_tag.set(ATTR_MIMETYPE, mimetype)
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_MD5)
attachment_tag.text = attachment.hash
else:
attachment_tag.set(ATTR_ID_TYPE, ID_TYPE_URL)
attachment_tag.text = attachment.url
neko259
Added local download links for attachments
r1508
neko259
Allow syncing URL attachments
r1706 if tag_refs is not None:
attachment_ref = et.SubElement(tag_refs, TAG_ATTACHMENT_REF)
attachment_ref.set(ATTR_REF, attachment.hash)
neko259
Fixed syncing posts with internal attachments
r1707 attachment_ref.set(ATTR_URL, attachment.file.url)