##// END OF EJS Templates
Deduplicated file hash calculation method
neko259 -
r1305:8d3d0486 default
parent child Browse files
Show More
@@ -1,71 +1,63 b''
1 1 import hashlib
2 2 import os
3 3 import time
4 4
5 5 from random import random
6 6
7 7 from django.db import models
8 from boards import utils
8 9
9 10 from boards.models.attachment.viewers import get_viewers, AbstractViewer
10 11
11 12
12 13 FILES_DIRECTORY = 'files/'
13 14 FILE_EXTENSION_DELIMITER = '.'
14 15
15 16
16 17 class AttachmentManager(models.Manager):
17 18 def create_with_hash(self, file):
18 file_hash = self.get_hash(file)
19 file_hash = utils.get_file_hash(file)
19 20 existing = self.filter(hash=file_hash)
20 21 if len(existing) > 0:
21 22 attachment = existing[0]
22 23 else:
23 24 file_type = file.name.split(FILE_EXTENSION_DELIMITER)[-1].lower()
24 attachment = Attachment.objects.create(file=file,
25 mimetype=file_type, hash=file_hash)
25 attachment = Attachment.objects.create(
26 file=file, mimetype=file_type, hash=file_hash)
26 27
27 28 return attachment
28 29
29 def get_hash(self, file):
30 """
31 Gets hash of an file.
32 """
33 md5 = hashlib.md5()
34 for chunk in file.chunks():
35 md5.update(chunk)
36 return md5.hexdigest()
37
38 30
39 31 class Attachment(models.Model):
40 32 objects = AttachmentManager()
41 33
42 34 # TODO Dedup the method
43 35 def _update_filename(self, filename):
44 36 """
45 37 Gets unique filename
46 38 """
47 39
48 40 # TODO Use something other than random number in file name
49 41 new_name = '{}{}.{}'.format(
50 42 str(int(time.mktime(time.gmtime()))),
51 43 str(int(random() * 1000)),
52 44 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
53 45
54 46 return os.path.join(FILES_DIRECTORY, new_name)
55 47
56 48 file = models.FileField(upload_to=_update_filename)
57 49 mimetype = models.CharField(max_length=50)
58 50 hash = models.CharField(max_length=36)
59 51
60 52 def get_view(self):
61 53 file_viewer = None
62 54 for viewer in get_viewers():
63 55 if viewer.supports(self.mimetype):
64 56 file_viewer = viewer(self.file, self.mimetype)
65 57 break
66 58 if file_viewer is None:
67 59 file_viewer = AbstractViewer(self.file, self.mimetype)
68 60
69 61 return file_viewer.get_view()
70 62
71 63
@@ -1,121 +1,112 b''
1 1 import hashlib
2 2 import os
3 3 from random import random
4 4 import time
5 5
6 6 from django.db import models
7 7 from django.template.defaultfilters import filesizeformat
8 8
9 from boards import thumbs
9 from boards import thumbs, utils
10 10 import boards
11 11 from boards.models.base import Viewable
12 12
13 13 __author__ = 'neko259'
14 14
15 15
16 16 IMAGE_THUMB_SIZE = (200, 150)
17 17 IMAGES_DIRECTORY = 'images/'
18 18 FILE_EXTENSION_DELIMITER = '.'
19 19 HASH_LENGTH = 36
20 20
21 21 CSS_CLASS_IMAGE = 'image'
22 22 CSS_CLASS_THUMB = 'thumb'
23 23
24 24
25 25 class PostImageManager(models.Manager):
26 26 def create_with_hash(self, image):
27 image_hash = self.get_hash(image)
27 image_hash = utils.get_file_hash(image)
28 28 existing = self.filter(hash=image_hash)
29 29 if len(existing) > 0:
30 30 post_image = existing[0]
31 31 else:
32 32 post_image = PostImage.objects.create(image=image)
33 33
34 34 return post_image
35 35
36 def get_hash(self, image):
37 """
38 Gets hash of an image.
39 """
40 md5 = hashlib.md5()
41 for chunk in image.chunks():
42 md5.update(chunk)
43 return md5.hexdigest()
44
45 36 def get_random_images(self, count, include_archived=False, tags=None):
46 37 images = self.filter(post_images__thread__archived=include_archived)
47 38 if tags is not None:
48 39 images = images.filter(post_images__threads__tags__in=tags)
49 40 return images.order_by('?')[:count]
50 41
51 42
52 43 class PostImage(models.Model, Viewable):
53 44 objects = PostImageManager()
54 45
55 46 class Meta:
56 47 app_label = 'boards'
57 48 ordering = ('id',)
58 49
59 50 def _update_image_filename(self, filename):
60 51 """
61 52 Gets unique image filename
62 53 """
63 54
64 55 # TODO Use something other than random number in file name
65 56 new_name = '{}{}.{}'.format(
66 57 str(int(time.mktime(time.gmtime()))),
67 58 str(int(random() * 1000)),
68 59 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
69 60
70 61 return os.path.join(IMAGES_DIRECTORY, new_name)
71 62
72 63 width = models.IntegerField(default=0)
73 64 height = models.IntegerField(default=0)
74 65
75 66 pre_width = models.IntegerField(default=0)
76 67 pre_height = models.IntegerField(default=0)
77 68
78 69 image = thumbs.ImageWithThumbsField(upload_to=_update_image_filename,
79 70 blank=True, sizes=(IMAGE_THUMB_SIZE,),
80 71 width_field='width',
81 72 height_field='height',
82 73 preview_width_field='pre_width',
83 74 preview_height_field='pre_height')
84 75 hash = models.CharField(max_length=HASH_LENGTH)
85 76
86 77 def save(self, *args, **kwargs):
87 78 """
88 79 Saves the model and computes the image hash for deduplication purposes.
89 80 """
90 81
91 82 if not self.pk and self.image:
92 self.hash = PostImage.objects.get_hash(self.image)
83 self.hash = utils.get_file_hash(self.image)
93 84 super(PostImage, self).save(*args, **kwargs)
94 85
95 86 def __str__(self):
96 87 return self.image.url
97 88
98 89 def get_view(self):
99 90 metadata = '{}, {}'.format(self.image.name.split('.')[-1],
100 91 filesizeformat(self.image.size))
101 92 return '<div class="{}">' \
102 93 '<a class="{}" href="{full}">' \
103 94 '<img class="post-image-preview"' \
104 95 ' src="{}"' \
105 96 ' alt="{}"' \
106 97 ' width="{}"' \
107 98 ' height="{}"' \
108 99 ' data-width="{}"' \
109 100 ' data-height="{}" />' \
110 101 '</a>' \
111 102 '<div class="image-metadata">{image_meta}</div>' \
112 103 '</div>'\
113 104 .format(CSS_CLASS_IMAGE, CSS_CLASS_THUMB,
114 105 self.image.url_200x150,
115 106 str(self.hash), str(self.pre_width),
116 107 str(self.pre_height), str(self.width), str(self.height),
117 108 full=self.image.url, image_meta=metadata)
118 109
119 110 def get_random_associated_post(self):
120 111 posts = boards.models.Post.objects.filter(images__in=[self])
121 112 return posts.order_by('?').first()
@@ -1,84 +1,92 b''
1 1 """
2 2 This module contains helper functions and helper classes.
3 3 """
4 import hashlib
4 5 import time
5 6 import hmac
6 7
7 8 from django.core.cache import cache
8 9 from django.db.models import Model
9 10
10 11 from django.utils import timezone
11 12
12 13 from neboard import settings
13 14
14 15
15 16 CACHE_KEY_DELIMITER = '_'
16 17 PERMISSION_MODERATE = 'moderation'
17 18
18 19 def get_client_ip(request):
19 20 x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')
20 21 if x_forwarded_for:
21 22 ip = x_forwarded_for.split(',')[-1].strip()
22 23 else:
23 24 ip = request.META.get('REMOTE_ADDR')
24 25 return ip
25 26
26 27
27 28 # TODO The output format is not epoch because it includes microseconds
28 29 def datetime_to_epoch(datetime):
29 30 return int(time.mktime(timezone.localtime(
30 31 datetime,timezone.get_current_timezone()).timetuple())
31 32 * 1000000 + datetime.microsecond)
32 33
33 34
34 35 def get_websocket_token(user_id='', timestamp=''):
35 36 """
36 37 Create token to validate information provided by new connection.
37 38 """
38 39
39 40 sign = hmac.new(settings.CENTRIFUGE_PROJECT_SECRET.encode())
40 41 sign.update(settings.CENTRIFUGE_PROJECT_ID.encode())
41 42 sign.update(user_id.encode())
42 43 sign.update(timestamp.encode())
43 44 token = sign.hexdigest()
44 45
45 46 return token
46 47
47 48
48 49 def cached_result(key_method=None):
49 50 """
50 51 Caches method result in the Django's cache system, persisted by object name,
51 52 object name and model id if object is a Django model.
52 53 """
53 54 def _cached_result(function):
54 55 def inner_func(obj, *args, **kwargs):
55 56 # TODO Include method arguments to the cache key
56 57 cache_key_params = [obj.__class__.__name__, function.__name__]
57 58 if isinstance(obj, Model):
58 59 cache_key_params.append(str(obj.id))
59 60
60 61 if key_method is not None:
61 62 cache_key_params += [str(arg) for arg in key_method(obj)]
62 63
63 64 cache_key = CACHE_KEY_DELIMITER.join(cache_key_params)
64 65
65 66 persisted_result = cache.get(cache_key)
66 67 if persisted_result is not None:
67 68 result = persisted_result
68 69 else:
69 70 result = function(obj, *args, **kwargs)
70 71 cache.set(cache_key, result)
71 72
72 73 return result
73 74
74 75 return inner_func
75 76 return _cached_result
76 77
77 78
78 79 def is_moderator(request):
79 80 try:
80 81 moderate = request.user.has_perm(PERMISSION_MODERATE)
81 82 except AttributeError:
82 83 moderate = False
83 84
84 return moderate No newline at end of file
85 return moderate
86
87
88 def get_file_hash(file) -> str:
89 md5 = hashlib.md5()
90 for chunk in file.chunks():
91 md5.update(chunk)
92 return md5.hexdigest()
General Comments 0
You need to be logged in to leave comments. Login now