##// END OF EJS Templates
file-store: the file backend now uses directory distribution to optimize file storage.
marcink -
r3454:b3d726a5 default
parent child Browse files
Show More
@@ -1,175 +1,188 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2019 RhodeCode GmbH
3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import os
21 import os
22 import time
22 import time
23 import shutil
23 import shutil
24
24
25 from rhodecode.lib.ext_json import json
25 from rhodecode.lib.ext_json import json
26 from rhodecode.apps.file_store import utils
26 from rhodecode.apps.file_store import utils
27 from rhodecode.apps.file_store.extensions import resolve_extensions
27 from rhodecode.apps.file_store.extensions import resolve_extensions
28 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
28 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
29
29
30 METADATA_VER = 'v1'
30 METADATA_VER = 'v1'
31
31
32
32
33 class LocalFileStorage(object):
33 class LocalFileStorage(object):
34
34
35 @classmethod
35 @classmethod
36 def resolve_name(cls, name, directory):
36 def resolve_name(cls, name, directory):
37 """
37 """
38 Resolves a unique name and the correct path. If a filename
38 Resolves a unique name and the correct path. If a filename
39 for that path already exists then a numeric prefix with values > 0 will be
39 for that path already exists then a numeric prefix with values > 0 will be
40 added, for example test.jpg -> test-1.jpg etc. initially file would have 0 prefix.
40 added, for example test.jpg -> test-1.jpg etc. initially file would have 0 prefix.
41
41
42 :param name: base name of file
42 :param name: base name of file
43 :param directory: absolute directory path
43 :param directory: absolute directory path
44 """
44 """
45
45
46 basename, ext = os.path.splitext(name)
46 basename, ext = os.path.splitext(name)
47 counter = 0
47 counter = 0
48 while True:
48 while True:
49 name = '%s-%d%s' % (basename, counter, ext)
49 name = '%s-%d%s' % (basename, counter, ext)
50 path = os.path.join(directory, name)
50
51 # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
52 sub_store = cls._sub_store_from_filename(basename)
53 sub_store_path = os.path.join(directory, sub_store)
54 if not os.path.exists(sub_store_path):
55 os.makedirs(sub_store_path)
56
57 path = os.path.join(sub_store_path, name)
51 if not os.path.exists(path):
58 if not os.path.exists(path):
52 return name, path
59 return name, path
53 counter += 1
60 counter += 1
54
61
62 @classmethod
63 def _sub_store_from_filename(cls, filename):
64 return filename[:2]
65
55 def __init__(self, base_path, extension_groups=None):
66 def __init__(self, base_path, extension_groups=None):
56
67
57 """
68 """
58 Local file storage
69 Local file storage
59
70
60 :param base_path: the absolute base path where uploads are stored
71 :param base_path: the absolute base path where uploads are stored
61 :param extension_groups: extensions string
72 :param extension_groups: extensions string
62 """
73 """
63
74
64 extension_groups = extension_groups or ['any']
75 extension_groups = extension_groups or ['any']
65 self.base_path = base_path
76 self.base_path = base_path
66 self.extensions = resolve_extensions([], groups=extension_groups)
77 self.extensions = resolve_extensions([], groups=extension_groups)
67
78
68 def store_path(self, filename):
79 def store_path(self, filename):
69 """
80 """
70 Returns absolute file path of the filename, joined to the
81 Returns absolute file path of the filename, joined to the
71 base_path.
82 base_path.
72
83
73 :param filename: base name of file
84 :param filename: base name of file
74 """
85 """
75 return os.path.join(self.base_path, filename)
86 sub_store = self._sub_store_from_filename(filename)
87 return os.path.join(self.base_path, sub_store, filename)
76
88
77 def delete(self, filename):
89 def delete(self, filename):
78 """
90 """
79 Deletes the filename. Filename is resolved with the
91 Deletes the filename. Filename is resolved with the
80 absolute path based on base_path. If file does not exist,
92 absolute path based on base_path. If file does not exist,
81 returns **False**, otherwise **True**
93 returns **False**, otherwise **True**
82
94
83 :param filename: base name of file
95 :param filename: base name of file
84 """
96 """
85 if self.exists(filename):
97 if self.exists(filename):
86 os.remove(self.store_path(filename))
98 os.remove(self.store_path(filename))
87 return True
99 return True
88 return False
100 return False
89
101
90 def exists(self, filename):
102 def exists(self, filename):
91 """
103 """
92 Checks if file exists. Resolves filename's absolute
104 Checks if file exists. Resolves filename's absolute
93 path based on base_path.
105 path based on base_path.
94
106
95 :param filename: base name of file
107 :param filename: base name of file
96 """
108 """
97 return os.path.exists(self.store_path(filename))
109 return os.path.exists(self.store_path(filename))
98
110
99 def filename_allowed(self, filename, extensions=None):
111 def filename_allowed(self, filename, extensions=None):
100 """Checks if a filename has an allowed extension
112 """Checks if a filename has an allowed extension
101
113
102 :param filename: base name of file
114 :param filename: base name of file
103 :param extensions: iterable of extensions (or self.extensions)
115 :param extensions: iterable of extensions (or self.extensions)
104 """
116 """
105 _, ext = os.path.splitext(filename)
117 _, ext = os.path.splitext(filename)
106 return self.extension_allowed(ext, extensions)
118 return self.extension_allowed(ext, extensions)
107
119
108 def extension_allowed(self, ext, extensions=None):
120 def extension_allowed(self, ext, extensions=None):
109 """
121 """
110 Checks if an extension is permitted. Both e.g. ".jpg" and
122 Checks if an extension is permitted. Both e.g. ".jpg" and
111 "jpg" can be passed in. Extension lookup is case-insensitive.
123 "jpg" can be passed in. Extension lookup is case-insensitive.
112
124
113 :param ext: extension to check
125 :param ext: extension to check
114 :param extensions: iterable of extensions to validate against (or self.extensions)
126 :param extensions: iterable of extensions to validate against (or self.extensions)
115 """
127 """
116
128
117 extensions = extensions or self.extensions
129 extensions = extensions or self.extensions
118 if not extensions:
130 if not extensions:
119 return True
131 return True
120 if ext.startswith('.'):
132 if ext.startswith('.'):
121 ext = ext[1:]
133 ext = ext[1:]
122 return ext.lower() in extensions
134 return ext.lower() in extensions
123
135
124 def save_file(self, file_obj, filename, directory=None, extensions=None,
136 def save_file(self, file_obj, filename, directory=None, extensions=None,
125 metadata=None, **kwargs):
137 metadata=None, **kwargs):
126 """
138 """
127 Saves a file object to the uploads location.
139 Saves a file object to the uploads location.
128 Returns the resolved filename, i.e. the directory +
140 Returns the resolved filename, i.e. the directory +
129 the (randomized/incremented) base name.
141 the (randomized/incremented) base name.
130
142
131 :param file_obj: **cgi.FieldStorage** object (or similar)
143 :param file_obj: **cgi.FieldStorage** object (or similar)
132 :param filename: original filename
144 :param filename: original filename
133 :param directory: relative path of sub-directory
145 :param directory: relative path of sub-directory
134 :param extensions: iterable of allowed extensions, if not default
146 :param extensions: iterable of allowed extensions, if not default
135 :param metadata: JSON metadata to store next to the file with .meta suffix
147 :param metadata: JSON metadata to store next to the file with .meta suffix
136 :returns: modified filename
148 :returns: modified filename
137 """
149 """
138
150
139 extensions = extensions or self.extensions
151 extensions = extensions or self.extensions
140
152
141 if not self.filename_allowed(filename, extensions):
153 if not self.filename_allowed(filename, extensions):
142 raise FileNotAllowedException()
154 raise FileNotAllowedException()
143
155
144 if directory:
156 if directory:
145 dest_directory = os.path.join(self.base_path, directory)
157 dest_directory = os.path.join(self.base_path, directory)
146 else:
158 else:
147 dest_directory = self.base_path
159 dest_directory = self.base_path
148
160
149 if not os.path.exists(dest_directory):
161 if not os.path.exists(dest_directory):
150 os.makedirs(dest_directory)
162 os.makedirs(dest_directory)
151
163
152 filename = utils.uid_filename(filename)
164 filename = utils.uid_filename(filename)
153
165
154 filename, path = self.resolve_name(filename, dest_directory)
166 filename, path = self.resolve_name(filename, dest_directory)
155 filename_meta = filename + '.meta'
156
167
157 file_obj.seek(0)
168 file_obj.seek(0)
158
169
159 with open(path, "wb") as dest:
170 with open(path, "wb") as dest:
160 shutil.copyfileobj(file_obj, dest)
171 shutil.copyfileobj(file_obj, dest)
161
172
162 if metadata:
173 if metadata:
163 size = os.stat(path).st_size
174 size = os.stat(path).st_size
164 metadata.update(
175 metadata.update(
165 {"size": size,
176 {"size": size,
166 "time": time.time(),
177 "time": time.time(),
167 "meta_ver": METADATA_VER})
178 "meta_ver": METADATA_VER})
168
179
169 with open(os.path.join(dest_directory, filename_meta), "wb") as dest_meta:
180 stored_file_path = os.path.dirname(path)
181 filename_meta = filename + '.meta'
182 with open(os.path.join(stored_file_path, filename_meta), "wb") as dest_meta:
170 dest_meta.write(json.dumps(metadata))
183 dest_meta.write(json.dumps(metadata))
171
184
172 if directory:
185 if directory:
173 filename = os.path.join(directory, filename)
186 filename = os.path.join(directory, filename)
174
187
175 return filename, metadata
188 return filename, metadata
@@ -1,110 +1,111 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2010-2019 RhodeCode GmbH
3 # Copyright (C) 2010-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 import os
20 import os
21 import pytest
21 import pytest
22
22
23 from rhodecode.lib.ext_json import json
23 from rhodecode.lib.ext_json import json
24 from rhodecode.tests import TestController
24 from rhodecode.tests import TestController
25 from rhodecode.apps.file_store import utils, config_keys
25 from rhodecode.apps.file_store import utils, config_keys
26
26
27
27
28 def route_path(name, params=None, **kwargs):
28 def route_path(name, params=None, **kwargs):
29 import urllib
29 import urllib
30
30
31 base_url = {
31 base_url = {
32 'upload_file': '/_file_store/upload',
32 'upload_file': '/_file_store/upload',
33 'download_file': '/_file_store/download/{fid}',
33 'download_file': '/_file_store/download/{fid}',
34
34
35 }[name].format(**kwargs)
35 }[name].format(**kwargs)
36
36
37 if params:
37 if params:
38 base_url = '{}?{}'.format(base_url, urllib.urlencode(params))
38 base_url = '{}?{}'.format(base_url, urllib.urlencode(params))
39 return base_url
39 return base_url
40
40
41
41
42 class TestFileStoreViews(TestController):
42 class TestFileStoreViews(TestController):
43
43
44 @pytest.mark.parametrize("fid, content, exists", [
44 @pytest.mark.parametrize("fid, content, exists", [
45 ('abcde-0.jpg', "xxxxx", True),
45 ('abcde-0.jpg', "xxxxx", True),
46 ('abcde-0.exe', "1234567", True),
46 ('abcde-0.exe', "1234567", True),
47 ('abcde-0.jpg', "xxxxx", False),
47 ('abcde-0.jpg', "xxxxx", False),
48 ])
48 ])
49 def test_get_files_from_store(self, fid, content, exists, tmpdir):
49 def test_get_files_from_store(self, fid, content, exists, tmpdir):
50 self.log_user()
50 self.log_user()
51 store_path = self.app._pyramid_settings[config_keys.store_path]
51 store_path = self.app._pyramid_settings[config_keys.store_path]
52
52
53 if exists:
53 if exists:
54 status = 200
54 status = 200
55 store = utils.get_file_storage({config_keys.store_path: store_path})
55 store = utils.get_file_storage({config_keys.store_path: store_path})
56 filesystem_file = os.path.join(str(tmpdir), fid)
56 filesystem_file = os.path.join(str(tmpdir), fid)
57 with open(filesystem_file, 'wb') as f:
57 with open(filesystem_file, 'wb') as f:
58 f.write(content)
58 f.write(content)
59
59
60 with open(filesystem_file, 'rb') as f:
60 with open(filesystem_file, 'rb') as f:
61 fid, metadata = store.save_file(f, fid, metadata={'filename': fid})
61 fid, metadata = store.save_file(f, fid, metadata={'filename': fid})
62
62
63 else:
63 else:
64 status = 404
64 status = 404
65
65
66 response = self.app.get(route_path('download_file', fid=fid), status=status)
66 response = self.app.get(route_path('download_file', fid=fid), status=status)
67
67
68 if exists:
68 if exists:
69 assert response.text == content
69 assert response.text == content
70 metadata = os.path.join(store_path, fid + '.meta')
70 file_store_path = os.path.dirname(store.resolve_name(fid, store_path)[1])
71 assert os.path.exists(metadata)
71 metadata_file = os.path.join(file_store_path, fid + '.meta')
72 with open(metadata, 'rb') as f:
72 assert os.path.exists(metadata_file)
73 with open(metadata_file, 'rb') as f:
73 json_data = json.loads(f.read())
74 json_data = json.loads(f.read())
74
75
75 assert json_data
76 assert json_data
76 assert 'size' in json_data
77 assert 'size' in json_data
77
78
78 def test_upload_files_without_content_to_store(self):
79 def test_upload_files_without_content_to_store(self):
79 self.log_user()
80 self.log_user()
80 response = self.app.post(
81 response = self.app.post(
81 route_path('upload_file'),
82 route_path('upload_file'),
82 params={'csrf_token': self.csrf_token},
83 params={'csrf_token': self.csrf_token},
83 status=200)
84 status=200)
84
85
85 assert response.json == {
86 assert response.json == {
86 u'error': u'store_file data field is missing',
87 u'error': u'store_file data field is missing',
87 u'access_path': None,
88 u'access_path': None,
88 u'store_fid': None}
89 u'store_fid': None}
89
90
90 def test_upload_files_bogus_content_to_store(self):
91 def test_upload_files_bogus_content_to_store(self):
91 self.log_user()
92 self.log_user()
92 response = self.app.post(
93 response = self.app.post(
93 route_path('upload_file'),
94 route_path('upload_file'),
94 params={'csrf_token': self.csrf_token, 'store_file': 'bogus'},
95 params={'csrf_token': self.csrf_token, 'store_file': 'bogus'},
95 status=200)
96 status=200)
96
97
97 assert response.json == {
98 assert response.json == {
98 u'error': u'filename cannot be read from the data field',
99 u'error': u'filename cannot be read from the data field',
99 u'access_path': None,
100 u'access_path': None,
100 u'store_fid': None}
101 u'store_fid': None}
101
102
102 def test_upload_content_to_store(self):
103 def test_upload_content_to_store(self):
103 self.log_user()
104 self.log_user()
104 response = self.app.post(
105 response = self.app.post(
105 route_path('upload_file'),
106 route_path('upload_file'),
106 upload_files=[('store_file', 'myfile.txt', 'SOME CONTENT')],
107 upload_files=[('store_file', 'myfile.txt', 'SOME CONTENT')],
107 params={'csrf_token': self.csrf_token},
108 params={'csrf_token': self.csrf_token},
108 status=200)
109 status=200)
109
110
110 assert response.json['store_fid']
111 assert response.json['store_fid']
General Comments 0
You need to be logged in to leave comments. Login now