##// END OF EJS Templates
file-store: always calculate sha256 and metadata.
marcink -
r3455:4171fa2a default
parent child Browse files
Show More
@@ -1,188 +1,211 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import os
22 22 import time
23 23 import shutil
24 import hashlib
24 25
25 26 from rhodecode.lib.ext_json import json
26 27 from rhodecode.apps.file_store import utils
27 28 from rhodecode.apps.file_store.extensions import resolve_extensions
28 29 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
29 30
30 31 METADATA_VER = 'v1'
31 32
32 33
33 34 class LocalFileStorage(object):
34 35
35 36 @classmethod
36 37 def resolve_name(cls, name, directory):
37 38 """
38 39 Resolves a unique name and the correct path. If a filename
39 40 for that path already exists then a numeric prefix with values > 0 will be
40 41 added, for example test.jpg -> test-1.jpg etc. initially file would have 0 prefix.
41 42
42 43 :param name: base name of file
43 44 :param directory: absolute directory path
44 45 """
45 46
46 47 basename, ext = os.path.splitext(name)
47 48 counter = 0
48 49 while True:
49 50 name = '%s-%d%s' % (basename, counter, ext)
50 51
51 52 # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
52 53 sub_store = cls._sub_store_from_filename(basename)
53 54 sub_store_path = os.path.join(directory, sub_store)
54 55 if not os.path.exists(sub_store_path):
55 56 os.makedirs(sub_store_path)
56 57
57 58 path = os.path.join(sub_store_path, name)
58 59 if not os.path.exists(path):
59 60 return name, path
60 61 counter += 1
61 62
62 63 @classmethod
63 64 def _sub_store_from_filename(cls, filename):
64 65 return filename[:2]
65 66
67 @classmethod
68 def calculate_path_hash(cls, file_path):
69 """
70 Efficient calculation of file_path sha256 sum
71
72 :param file_path:
73 :return: sha256sum
74 """
75 digest = hashlib.sha256()
76 with open(file_path, 'rb') as f:
77 for chunk in iter(lambda: f.read(1024 * 100), b""):
78 digest.update(chunk)
79
80 return digest.hexdigest()
81
66 82 def __init__(self, base_path, extension_groups=None):
67 83
68 84 """
69 85 Local file storage
70 86
71 87 :param base_path: the absolute base path where uploads are stored
72 88 :param extension_groups: extensions string
73 89 """
74 90
75 91 extension_groups = extension_groups or ['any']
76 92 self.base_path = base_path
77 93 self.extensions = resolve_extensions([], groups=extension_groups)
78 94
79 95 def store_path(self, filename):
80 96 """
81 97 Returns absolute file path of the filename, joined to the
82 98 base_path.
83 99
84 100 :param filename: base name of file
85 101 """
86 102 sub_store = self._sub_store_from_filename(filename)
87 103 return os.path.join(self.base_path, sub_store, filename)
88 104
89 105 def delete(self, filename):
90 106 """
91 107 Deletes the filename. Filename is resolved with the
92 108 absolute path based on base_path. If file does not exist,
93 109 returns **False**, otherwise **True**
94 110
95 111 :param filename: base name of file
96 112 """
97 113 if self.exists(filename):
98 114 os.remove(self.store_path(filename))
99 115 return True
100 116 return False
101 117
102 118 def exists(self, filename):
103 119 """
104 120 Checks if file exists. Resolves filename's absolute
105 121 path based on base_path.
106 122
107 123 :param filename: base name of file
108 124 """
109 125 return os.path.exists(self.store_path(filename))
110 126
111 127 def filename_allowed(self, filename, extensions=None):
112 128 """Checks if a filename has an allowed extension
113 129
114 130 :param filename: base name of file
115 131 :param extensions: iterable of extensions (or self.extensions)
116 132 """
117 133 _, ext = os.path.splitext(filename)
118 134 return self.extension_allowed(ext, extensions)
119 135
120 136 def extension_allowed(self, ext, extensions=None):
121 137 """
122 138 Checks if an extension is permitted. Both e.g. ".jpg" and
123 139 "jpg" can be passed in. Extension lookup is case-insensitive.
124 140
125 141 :param ext: extension to check
126 142 :param extensions: iterable of extensions to validate against (or self.extensions)
127 143 """
128 144
129 145 extensions = extensions or self.extensions
130 146 if not extensions:
131 147 return True
132 148 if ext.startswith('.'):
133 149 ext = ext[1:]
134 150 return ext.lower() in extensions
135 151
136 152 def save_file(self, file_obj, filename, directory=None, extensions=None,
137 metadata=None, **kwargs):
153 extra_metadata=None, **kwargs):
138 154 """
139 155 Saves a file object to the uploads location.
140 156 Returns the resolved filename, i.e. the directory +
141 157 the (randomized/incremented) base name.
142 158
143 159 :param file_obj: **cgi.FieldStorage** object (or similar)
144 160 :param filename: original filename
145 161 :param directory: relative path of sub-directory
146 162 :param extensions: iterable of allowed extensions, if not default
147 :param metadata: JSON metadata to store next to the file with .meta suffix
148 :returns: modified filename
163 :param extra_metadata: extra JSON metadata to store next to the file with .meta suffix
149 164 """
150 165
151 166 extensions = extensions or self.extensions
152 167
153 168 if not self.filename_allowed(filename, extensions):
154 169 raise FileNotAllowedException()
155 170
156 171 if directory:
157 172 dest_directory = os.path.join(self.base_path, directory)
158 173 else:
159 174 dest_directory = self.base_path
160 175
161 176 if not os.path.exists(dest_directory):
162 177 os.makedirs(dest_directory)
163 178
164 179 filename = utils.uid_filename(filename)
165 180
181 # resolve also produces special sub-dir for file optimized store
166 182 filename, path = self.resolve_name(filename, dest_directory)
183 stored_file_dir = os.path.dirname(path)
167 184
168 185 file_obj.seek(0)
169 186
170 187 with open(path, "wb") as dest:
171 188 shutil.copyfileobj(file_obj, dest)
172 189
173 if metadata:
174 size = os.stat(path).st_size
175 metadata.update(
176 {"size": size,
177 "time": time.time(),
178 "meta_ver": METADATA_VER})
190 metadata = {}
191 if extra_metadata:
192 metadata = extra_metadata
193
194 size = os.stat(path).st_size
195 file_hash = self.calculate_path_hash(path)
179 196
180 stored_file_path = os.path.dirname(path)
181 filename_meta = filename + '.meta'
182 with open(os.path.join(stored_file_path, filename_meta), "wb") as dest_meta:
183 dest_meta.write(json.dumps(metadata))
197 metadata.update(
198 {"filename": filename,
199 "size": size,
200 "time": time.time(),
201 "sha256": file_hash,
202 "meta_ver": METADATA_VER})
203
204 filename_meta = filename + '.meta'
205 with open(os.path.join(stored_file_dir, filename_meta), "wb") as dest_meta:
206 dest_meta.write(json.dumps(metadata))
184 207
185 208 if directory:
186 209 filename = os.path.join(directory, filename)
187 210
188 211 return filename, metadata
@@ -1,111 +1,111 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20 import os
21 21 import pytest
22 22
23 23 from rhodecode.lib.ext_json import json
24 24 from rhodecode.tests import TestController
25 25 from rhodecode.apps.file_store import utils, config_keys
26 26
27 27
28 28 def route_path(name, params=None, **kwargs):
29 29 import urllib
30 30
31 31 base_url = {
32 32 'upload_file': '/_file_store/upload',
33 33 'download_file': '/_file_store/download/{fid}',
34 34
35 35 }[name].format(**kwargs)
36 36
37 37 if params:
38 38 base_url = '{}?{}'.format(base_url, urllib.urlencode(params))
39 39 return base_url
40 40
41 41
42 42 class TestFileStoreViews(TestController):
43 43
44 44 @pytest.mark.parametrize("fid, content, exists", [
45 45 ('abcde-0.jpg', "xxxxx", True),
46 46 ('abcde-0.exe', "1234567", True),
47 47 ('abcde-0.jpg', "xxxxx", False),
48 48 ])
49 49 def test_get_files_from_store(self, fid, content, exists, tmpdir):
50 50 self.log_user()
51 51 store_path = self.app._pyramid_settings[config_keys.store_path]
52 52
53 53 if exists:
54 54 status = 200
55 55 store = utils.get_file_storage({config_keys.store_path: store_path})
56 56 filesystem_file = os.path.join(str(tmpdir), fid)
57 57 with open(filesystem_file, 'wb') as f:
58 58 f.write(content)
59 59
60 60 with open(filesystem_file, 'rb') as f:
61 fid, metadata = store.save_file(f, fid, metadata={'filename': fid})
61 fid, metadata = store.save_file(f, fid, extra_metadata={'filename': fid})
62 62
63 63 else:
64 64 status = 404
65 65
66 66 response = self.app.get(route_path('download_file', fid=fid), status=status)
67 67
68 68 if exists:
69 69 assert response.text == content
70 70 file_store_path = os.path.dirname(store.resolve_name(fid, store_path)[1])
71 71 metadata_file = os.path.join(file_store_path, fid + '.meta')
72 72 assert os.path.exists(metadata_file)
73 73 with open(metadata_file, 'rb') as f:
74 74 json_data = json.loads(f.read())
75 75
76 76 assert json_data
77 77 assert 'size' in json_data
78 78
79 79 def test_upload_files_without_content_to_store(self):
80 80 self.log_user()
81 81 response = self.app.post(
82 82 route_path('upload_file'),
83 83 params={'csrf_token': self.csrf_token},
84 84 status=200)
85 85
86 86 assert response.json == {
87 87 u'error': u'store_file data field is missing',
88 88 u'access_path': None,
89 89 u'store_fid': None}
90 90
91 91 def test_upload_files_bogus_content_to_store(self):
92 92 self.log_user()
93 93 response = self.app.post(
94 94 route_path('upload_file'),
95 95 params={'csrf_token': self.csrf_token, 'store_file': 'bogus'},
96 96 status=200)
97 97
98 98 assert response.json == {
99 99 u'error': u'filename cannot be read from the data field',
100 100 u'access_path': None,
101 101 u'store_fid': None}
102 102
103 103 def test_upload_content_to_store(self):
104 104 self.log_user()
105 105 response = self.app.post(
106 106 route_path('upload_file'),
107 107 upload_files=[('store_file', 'myfile.txt', 'SOME CONTENT')],
108 108 params={'csrf_token': self.csrf_token},
109 109 status=200)
110 110
111 111 assert response.json['store_fid']
@@ -1,96 +1,95 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20 import logging
21 21
22 22 from pyramid.view import view_config
23 23 from pyramid.response import FileResponse
24 24 from pyramid.httpexceptions import HTTPFound, HTTPNotFound
25 25
26 26 from rhodecode.apps._base import BaseAppView
27 27 from rhodecode.apps.file_store import utils
28 28 from rhodecode.apps.file_store.exceptions import (
29 29 FileNotAllowedException,FileOverSizeException)
30 30
31 31 from rhodecode.lib import helpers as h
32 32 from rhodecode.lib import audit_logger
33 33 from rhodecode.lib.auth import (CSRFRequired, NotAnonymous)
34 34
35 35 log = logging.getLogger(__name__)
36 36
37 37
38 38 class FileStoreView(BaseAppView):
39 39 upload_key = 'store_file'
40 40
41 41 def load_default_context(self):
42 42 c = self._get_local_tmpl_context()
43 43 self.storage = utils.get_file_storage(self.request.registry.settings)
44 44 return c
45 45
46 46 @NotAnonymous()
47 47 @CSRFRequired()
48 48 @view_config(route_name='upload_file', request_method='POST', renderer='json_ext')
49 49 def upload_file(self):
50 50 self.load_default_context()
51 51 file_obj = self.request.POST.get(self.upload_key)
52 52
53 53 if file_obj is None:
54 54 return {'store_fid': None,
55 55 'access_path': None,
56 56 'error': '{} data field is missing'.format(self.upload_key)}
57 57
58 58 if not hasattr(file_obj, 'filename'):
59 59 return {'store_fid': None,
60 60 'access_path': None,
61 61 'error': 'filename cannot be read from the data field'}
62 62
63 63 filename = file_obj.filename
64 64
65 65 metadata = {
66 'filename': filename,
67 'size': '', # filled by save_file
68 66 'user_uploaded': {'username': self._rhodecode_user.username,
69 67 'user_id': self._rhodecode_user.user_id,
70 68 'ip': self._rhodecode_user.ip_addr}}
71 69 try:
72 store_fid, metadata = self.storage.save_file(file_obj.file, filename, metadata=metadata)
70 store_fid, metadata = self.storage.save_file(
71 file_obj.file, filename, extra_metadata=metadata)
73 72 except FileNotAllowedException:
74 73 return {'store_fid': None,
75 74 'access_path': None,
76 75 'error': 'File {} is not allowed.'.format(filename)}
77 76
78 77 except FileOverSizeException:
79 78 return {'store_fid': None,
80 79 'access_path': None,
81 80 'error': 'File {} is exceeding allowed limit.'.format(filename)}
82 81
83 82 return {'store_fid': store_fid,
84 83 'access_path': h.route_path('download_file', fid=store_fid)}
85 84
86 85 @view_config(route_name='download_file')
87 86 def download_file(self):
88 87 self.load_default_context()
89 88 file_uid = self.request.matchdict['fid']
90 89 log.debug('Requesting FID:%s from store %s', file_uid, self.storage)
91 90 if not self.storage.exists(file_uid):
92 91 log.debug('File with FID:%s not found in the store', file_uid)
93 92 raise HTTPNotFound()
94 93
95 94 file_path = self.storage.store_path(file_uid)
96 95 return FileResponse(file_path)
General Comments 0
You need to be logged in to leave comments. Login now