# HG changeset patch # User neko259 # Date 2017-01-11 19:48:10 # Node ID d33ed39f788e9de122c0e57a70f55f5b6731d0ba # Parent ce41b3a5b55959956940ae614c515b3eba8f2d78 Do not rely on the md5 hash of the file, compare the file contents when searching for duplicate diff --git a/boards/models/attachment/__init__.py b/boards/models/attachment/__init__.py --- a/boards/models/attachment/__init__.py +++ b/boards/models/attachment/__init__.py @@ -1,3 +1,5 @@ +from itertools import zip_longest + import boards from boards.models import STATUS_ARCHIVE from django.core.files.images import get_image_dimensions @@ -12,10 +14,8 @@ from boards.utils import get_upload_file class AttachmentManager(models.Manager): def create_with_hash(self, file): file_hash = utils.get_file_hash(file) - existing = self.filter(hash=file_hash) - if len(existing) > 0: - attachment = existing[0] - else: + attachment = self._get_existing(file_hash, file) + if not attachment: # FIXME Use full mimetype here, need to modify viewers too file_type = get_extension(file.name) attachment = self.create(file=file, mimetype=file_type, @@ -38,6 +38,23 @@ class AttachmentManager(models.Manager): images = images.filter(attachment_posts__threads__tags__in=tags) return images.order_by('?')[:count] + def _get_existing(self, file_hash, file): + """ + Gets an attachment with the same file if one exists. + """ + existing = self.filter(hash=file_hash) + attachment = None + for existing_attachment in existing: + equal = True + existing_file = existing_attachment.file + for chunk, existing_chunk in zip_longest(file.chunks(), existing_file.chunks()): + if chunk != existing_chunk: + equal = False + break + if equal: + attachment = existing[0] + return attachment + class Attachment(models.Model): objects = AttachmentManager()