##// END OF EJS Templates
Deduplicated file hash calculation method
neko259 -
r1305:8d3d0486 default
parent child Browse files
Show More
@@ -1,71 +1,63 b''
1 import hashlib
1 import hashlib
2 import os
2 import os
3 import time
3 import time
4
4
5 from random import random
5 from random import random
6
6
7 from django.db import models
7 from django.db import models
8 from boards import utils
8
9
9 from boards.models.attachment.viewers import get_viewers, AbstractViewer
10 from boards.models.attachment.viewers import get_viewers, AbstractViewer
10
11
11
12
12 FILES_DIRECTORY = 'files/'
13 FILES_DIRECTORY = 'files/'
13 FILE_EXTENSION_DELIMITER = '.'
14 FILE_EXTENSION_DELIMITER = '.'
14
15
15
16
16 class AttachmentManager(models.Manager):
17 class AttachmentManager(models.Manager):
17 def create_with_hash(self, file):
18 def create_with_hash(self, file):
18 file_hash = self.get_hash(file)
19 file_hash = utils.get_file_hash(file)
19 existing = self.filter(hash=file_hash)
20 existing = self.filter(hash=file_hash)
20 if len(existing) > 0:
21 if len(existing) > 0:
21 attachment = existing[0]
22 attachment = existing[0]
22 else:
23 else:
23 file_type = file.name.split(FILE_EXTENSION_DELIMITER)[-1].lower()
24 file_type = file.name.split(FILE_EXTENSION_DELIMITER)[-1].lower()
24 attachment = Attachment.objects.create(file=file,
25 attachment = Attachment.objects.create(
25 mimetype=file_type, hash=file_hash)
26 file=file, mimetype=file_type, hash=file_hash)
26
27
27 return attachment
28 return attachment
28
29
29 def get_hash(self, file):
30 """
31 Gets hash of an file.
32 """
33 md5 = hashlib.md5()
34 for chunk in file.chunks():
35 md5.update(chunk)
36 return md5.hexdigest()
37
38
30
39 class Attachment(models.Model):
31 class Attachment(models.Model):
40 objects = AttachmentManager()
32 objects = AttachmentManager()
41
33
42 # TODO Dedup the method
34 # TODO Dedup the method
43 def _update_filename(self, filename):
35 def _update_filename(self, filename):
44 """
36 """
45 Gets unique filename
37 Gets unique filename
46 """
38 """
47
39
48 # TODO Use something other than random number in file name
40 # TODO Use something other than random number in file name
49 new_name = '{}{}.{}'.format(
41 new_name = '{}{}.{}'.format(
50 str(int(time.mktime(time.gmtime()))),
42 str(int(time.mktime(time.gmtime()))),
51 str(int(random() * 1000)),
43 str(int(random() * 1000)),
52 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
44 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
53
45
54 return os.path.join(FILES_DIRECTORY, new_name)
46 return os.path.join(FILES_DIRECTORY, new_name)
55
47
56 file = models.FileField(upload_to=_update_filename)
48 file = models.FileField(upload_to=_update_filename)
57 mimetype = models.CharField(max_length=50)
49 mimetype = models.CharField(max_length=50)
58 hash = models.CharField(max_length=36)
50 hash = models.CharField(max_length=36)
59
51
60 def get_view(self):
52 def get_view(self):
61 file_viewer = None
53 file_viewer = None
62 for viewer in get_viewers():
54 for viewer in get_viewers():
63 if viewer.supports(self.mimetype):
55 if viewer.supports(self.mimetype):
64 file_viewer = viewer(self.file, self.mimetype)
56 file_viewer = viewer(self.file, self.mimetype)
65 break
57 break
66 if file_viewer is None:
58 if file_viewer is None:
67 file_viewer = AbstractViewer(self.file, self.mimetype)
59 file_viewer = AbstractViewer(self.file, self.mimetype)
68
60
69 return file_viewer.get_view()
61 return file_viewer.get_view()
70
62
71
63
@@ -1,121 +1,112 b''
1 import hashlib
1 import hashlib
2 import os
2 import os
3 from random import random
3 from random import random
4 import time
4 import time
5
5
6 from django.db import models
6 from django.db import models
7 from django.template.defaultfilters import filesizeformat
7 from django.template.defaultfilters import filesizeformat
8
8
9 from boards import thumbs
9 from boards import thumbs, utils
10 import boards
10 import boards
11 from boards.models.base import Viewable
11 from boards.models.base import Viewable
12
12
13 __author__ = 'neko259'
13 __author__ = 'neko259'
14
14
15
15
16 IMAGE_THUMB_SIZE = (200, 150)
16 IMAGE_THUMB_SIZE = (200, 150)
17 IMAGES_DIRECTORY = 'images/'
17 IMAGES_DIRECTORY = 'images/'
18 FILE_EXTENSION_DELIMITER = '.'
18 FILE_EXTENSION_DELIMITER = '.'
19 HASH_LENGTH = 36
19 HASH_LENGTH = 36
20
20
21 CSS_CLASS_IMAGE = 'image'
21 CSS_CLASS_IMAGE = 'image'
22 CSS_CLASS_THUMB = 'thumb'
22 CSS_CLASS_THUMB = 'thumb'
23
23
24
24
25 class PostImageManager(models.Manager):
25 class PostImageManager(models.Manager):
26 def create_with_hash(self, image):
26 def create_with_hash(self, image):
27 image_hash = self.get_hash(image)
27 image_hash = utils.get_file_hash(image)
28 existing = self.filter(hash=image_hash)
28 existing = self.filter(hash=image_hash)
29 if len(existing) > 0:
29 if len(existing) > 0:
30 post_image = existing[0]
30 post_image = existing[0]
31 else:
31 else:
32 post_image = PostImage.objects.create(image=image)
32 post_image = PostImage.objects.create(image=image)
33
33
34 return post_image
34 return post_image
35
35
36 def get_hash(self, image):
37 """
38 Gets hash of an image.
39 """
40 md5 = hashlib.md5()
41 for chunk in image.chunks():
42 md5.update(chunk)
43 return md5.hexdigest()
44
45 def get_random_images(self, count, include_archived=False, tags=None):
36 def get_random_images(self, count, include_archived=False, tags=None):
46 images = self.filter(post_images__thread__archived=include_archived)
37 images = self.filter(post_images__thread__archived=include_archived)
47 if tags is not None:
38 if tags is not None:
48 images = images.filter(post_images__threads__tags__in=tags)
39 images = images.filter(post_images__threads__tags__in=tags)
49 return images.order_by('?')[:count]
40 return images.order_by('?')[:count]
50
41
51
42
52 class PostImage(models.Model, Viewable):
43 class PostImage(models.Model, Viewable):
53 objects = PostImageManager()
44 objects = PostImageManager()
54
45
55 class Meta:
46 class Meta:
56 app_label = 'boards'
47 app_label = 'boards'
57 ordering = ('id',)
48 ordering = ('id',)
58
49
59 def _update_image_filename(self, filename):
50 def _update_image_filename(self, filename):
60 """
51 """
61 Gets unique image filename
52 Gets unique image filename
62 """
53 """
63
54
64 # TODO Use something other than random number in file name
55 # TODO Use something other than random number in file name
65 new_name = '{}{}.{}'.format(
56 new_name = '{}{}.{}'.format(
66 str(int(time.mktime(time.gmtime()))),
57 str(int(time.mktime(time.gmtime()))),
67 str(int(random() * 1000)),
58 str(int(random() * 1000)),
68 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
59 filename.split(FILE_EXTENSION_DELIMITER)[-1:][0])
69
60
70 return os.path.join(IMAGES_DIRECTORY, new_name)
61 return os.path.join(IMAGES_DIRECTORY, new_name)
71
62
72 width = models.IntegerField(default=0)
63 width = models.IntegerField(default=0)
73 height = models.IntegerField(default=0)
64 height = models.IntegerField(default=0)
74
65
75 pre_width = models.IntegerField(default=0)
66 pre_width = models.IntegerField(default=0)
76 pre_height = models.IntegerField(default=0)
67 pre_height = models.IntegerField(default=0)
77
68
78 image = thumbs.ImageWithThumbsField(upload_to=_update_image_filename,
69 image = thumbs.ImageWithThumbsField(upload_to=_update_image_filename,
79 blank=True, sizes=(IMAGE_THUMB_SIZE,),
70 blank=True, sizes=(IMAGE_THUMB_SIZE,),
80 width_field='width',
71 width_field='width',
81 height_field='height',
72 height_field='height',
82 preview_width_field='pre_width',
73 preview_width_field='pre_width',
83 preview_height_field='pre_height')
74 preview_height_field='pre_height')
84 hash = models.CharField(max_length=HASH_LENGTH)
75 hash = models.CharField(max_length=HASH_LENGTH)
85
76
86 def save(self, *args, **kwargs):
77 def save(self, *args, **kwargs):
87 """
78 """
88 Saves the model and computes the image hash for deduplication purposes.
79 Saves the model and computes the image hash for deduplication purposes.
89 """
80 """
90
81
91 if not self.pk and self.image:
82 if not self.pk and self.image:
92 self.hash = PostImage.objects.get_hash(self.image)
83 self.hash = utils.get_file_hash(self.image)
93 super(PostImage, self).save(*args, **kwargs)
84 super(PostImage, self).save(*args, **kwargs)
94
85
95 def __str__(self):
86 def __str__(self):
96 return self.image.url
87 return self.image.url
97
88
98 def get_view(self):
89 def get_view(self):
99 metadata = '{}, {}'.format(self.image.name.split('.')[-1],
90 metadata = '{}, {}'.format(self.image.name.split('.')[-1],
100 filesizeformat(self.image.size))
91 filesizeformat(self.image.size))
101 return '<div class="{}">' \
92 return '<div class="{}">' \
102 '<a class="{}" href="{full}">' \
93 '<a class="{}" href="{full}">' \
103 '<img class="post-image-preview"' \
94 '<img class="post-image-preview"' \
104 ' src="{}"' \
95 ' src="{}"' \
105 ' alt="{}"' \
96 ' alt="{}"' \
106 ' width="{}"' \
97 ' width="{}"' \
107 ' height="{}"' \
98 ' height="{}"' \
108 ' data-width="{}"' \
99 ' data-width="{}"' \
109 ' data-height="{}" />' \
100 ' data-height="{}" />' \
110 '</a>' \
101 '</a>' \
111 '<div class="image-metadata">{image_meta}</div>' \
102 '<div class="image-metadata">{image_meta}</div>' \
112 '</div>'\
103 '</div>'\
113 .format(CSS_CLASS_IMAGE, CSS_CLASS_THUMB,
104 .format(CSS_CLASS_IMAGE, CSS_CLASS_THUMB,
114 self.image.url_200x150,
105 self.image.url_200x150,
115 str(self.hash), str(self.pre_width),
106 str(self.hash), str(self.pre_width),
116 str(self.pre_height), str(self.width), str(self.height),
107 str(self.pre_height), str(self.width), str(self.height),
117 full=self.image.url, image_meta=metadata)
108 full=self.image.url, image_meta=metadata)
118
109
119 def get_random_associated_post(self):
110 def get_random_associated_post(self):
120 posts = boards.models.Post.objects.filter(images__in=[self])
111 posts = boards.models.Post.objects.filter(images__in=[self])
121 return posts.order_by('?').first()
112 return posts.order_by('?').first()
@@ -1,84 +1,92 b''
1 """
1 """
2 This module contains helper functions and helper classes.
2 This module contains helper functions and helper classes.
3 """
3 """
4 import hashlib
4 import time
5 import time
5 import hmac
6 import hmac
6
7
7 from django.core.cache import cache
8 from django.core.cache import cache
8 from django.db.models import Model
9 from django.db.models import Model
9
10
10 from django.utils import timezone
11 from django.utils import timezone
11
12
12 from neboard import settings
13 from neboard import settings
13
14
14
15
15 CACHE_KEY_DELIMITER = '_'
16 CACHE_KEY_DELIMITER = '_'
16 PERMISSION_MODERATE = 'moderation'
17 PERMISSION_MODERATE = 'moderation'
17
18
18 def get_client_ip(request):
19 def get_client_ip(request):
19 x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')
20 x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')
20 if x_forwarded_for:
21 if x_forwarded_for:
21 ip = x_forwarded_for.split(',')[-1].strip()
22 ip = x_forwarded_for.split(',')[-1].strip()
22 else:
23 else:
23 ip = request.META.get('REMOTE_ADDR')
24 ip = request.META.get('REMOTE_ADDR')
24 return ip
25 return ip
25
26
26
27
27 # TODO The output format is not epoch because it includes microseconds
28 # TODO The output format is not epoch because it includes microseconds
28 def datetime_to_epoch(datetime):
29 def datetime_to_epoch(datetime):
29 return int(time.mktime(timezone.localtime(
30 return int(time.mktime(timezone.localtime(
30 datetime,timezone.get_current_timezone()).timetuple())
31 datetime,timezone.get_current_timezone()).timetuple())
31 * 1000000 + datetime.microsecond)
32 * 1000000 + datetime.microsecond)
32
33
33
34
34 def get_websocket_token(user_id='', timestamp=''):
35 def get_websocket_token(user_id='', timestamp=''):
35 """
36 """
36 Create token to validate information provided by new connection.
37 Create token to validate information provided by new connection.
37 """
38 """
38
39
39 sign = hmac.new(settings.CENTRIFUGE_PROJECT_SECRET.encode())
40 sign = hmac.new(settings.CENTRIFUGE_PROJECT_SECRET.encode())
40 sign.update(settings.CENTRIFUGE_PROJECT_ID.encode())
41 sign.update(settings.CENTRIFUGE_PROJECT_ID.encode())
41 sign.update(user_id.encode())
42 sign.update(user_id.encode())
42 sign.update(timestamp.encode())
43 sign.update(timestamp.encode())
43 token = sign.hexdigest()
44 token = sign.hexdigest()
44
45
45 return token
46 return token
46
47
47
48
48 def cached_result(key_method=None):
49 def cached_result(key_method=None):
49 """
50 """
50 Caches method result in the Django's cache system, persisted by object name,
51 Caches method result in the Django's cache system, persisted by object name,
51 object name and model id if object is a Django model.
52 object name and model id if object is a Django model.
52 """
53 """
53 def _cached_result(function):
54 def _cached_result(function):
54 def inner_func(obj, *args, **kwargs):
55 def inner_func(obj, *args, **kwargs):
55 # TODO Include method arguments to the cache key
56 # TODO Include method arguments to the cache key
56 cache_key_params = [obj.__class__.__name__, function.__name__]
57 cache_key_params = [obj.__class__.__name__, function.__name__]
57 if isinstance(obj, Model):
58 if isinstance(obj, Model):
58 cache_key_params.append(str(obj.id))
59 cache_key_params.append(str(obj.id))
59
60
60 if key_method is not None:
61 if key_method is not None:
61 cache_key_params += [str(arg) for arg in key_method(obj)]
62 cache_key_params += [str(arg) for arg in key_method(obj)]
62
63
63 cache_key = CACHE_KEY_DELIMITER.join(cache_key_params)
64 cache_key = CACHE_KEY_DELIMITER.join(cache_key_params)
64
65
65 persisted_result = cache.get(cache_key)
66 persisted_result = cache.get(cache_key)
66 if persisted_result is not None:
67 if persisted_result is not None:
67 result = persisted_result
68 result = persisted_result
68 else:
69 else:
69 result = function(obj, *args, **kwargs)
70 result = function(obj, *args, **kwargs)
70 cache.set(cache_key, result)
71 cache.set(cache_key, result)
71
72
72 return result
73 return result
73
74
74 return inner_func
75 return inner_func
75 return _cached_result
76 return _cached_result
76
77
77
78
78 def is_moderator(request):
79 def is_moderator(request):
79 try:
80 try:
80 moderate = request.user.has_perm(PERMISSION_MODERATE)
81 moderate = request.user.has_perm(PERMISSION_MODERATE)
81 except AttributeError:
82 except AttributeError:
82 moderate = False
83 moderate = False
83
84
84 return moderate No newline at end of file
85 return moderate
86
87
88 def get_file_hash(file) -> str:
89 md5 = hashlib.md5()
90 for chunk in file.chunks():
91 md5.update(chunk)
92 return md5.hexdigest()
General Comments 0
You need to be logged in to leave comments. Login now