##// END OF EJS Templates
file-store: the file backend now uses directory distribution to optimize file storage.
marcink -
r3454:b3d726a5 default
parent child Browse files
Show More
@@ -1,175 +1,188 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import os
22 22 import time
23 23 import shutil
24 24
25 25 from rhodecode.lib.ext_json import json
26 26 from rhodecode.apps.file_store import utils
27 27 from rhodecode.apps.file_store.extensions import resolve_extensions
28 28 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
29 29
30 30 METADATA_VER = 'v1'
31 31
32 32
33 33 class LocalFileStorage(object):
34 34
35 35 @classmethod
36 36 def resolve_name(cls, name, directory):
37 37 """
38 38 Resolves a unique name and the correct path. If a filename
39 39 for that path already exists then a numeric prefix with values > 0 will be
40 40 added, for example test.jpg -> test-1.jpg etc. initially file would have 0 prefix.
41 41
42 42 :param name: base name of file
43 43 :param directory: absolute directory path
44 44 """
45 45
46 46 basename, ext = os.path.splitext(name)
47 47 counter = 0
48 48 while True:
49 49 name = '%s-%d%s' % (basename, counter, ext)
50 path = os.path.join(directory, name)
50
51 # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
52 sub_store = cls._sub_store_from_filename(basename)
53 sub_store_path = os.path.join(directory, sub_store)
54 if not os.path.exists(sub_store_path):
55 os.makedirs(sub_store_path)
56
57 path = os.path.join(sub_store_path, name)
51 58 if not os.path.exists(path):
52 59 return name, path
53 60 counter += 1
54 61
62 @classmethod
63 def _sub_store_from_filename(cls, filename):
64 return filename[:2]
65
55 66 def __init__(self, base_path, extension_groups=None):
56 67
57 68 """
58 69 Local file storage
59 70
60 71 :param base_path: the absolute base path where uploads are stored
61 72 :param extension_groups: extensions string
62 73 """
63 74
64 75 extension_groups = extension_groups or ['any']
65 76 self.base_path = base_path
66 77 self.extensions = resolve_extensions([], groups=extension_groups)
67 78
68 79 def store_path(self, filename):
69 80 """
70 81 Returns absolute file path of the filename, joined to the
71 82 base_path.
72 83
73 84 :param filename: base name of file
74 85 """
75 return os.path.join(self.base_path, filename)
86 sub_store = self._sub_store_from_filename(filename)
87 return os.path.join(self.base_path, sub_store, filename)
76 88
77 89 def delete(self, filename):
78 90 """
79 91 Deletes the filename. Filename is resolved with the
80 92 absolute path based on base_path. If file does not exist,
81 93 returns **False**, otherwise **True**
82 94
83 95 :param filename: base name of file
84 96 """
85 97 if self.exists(filename):
86 98 os.remove(self.store_path(filename))
87 99 return True
88 100 return False
89 101
90 102 def exists(self, filename):
91 103 """
92 104 Checks if file exists. Resolves filename's absolute
93 105 path based on base_path.
94 106
95 107 :param filename: base name of file
96 108 """
97 109 return os.path.exists(self.store_path(filename))
98 110
99 111 def filename_allowed(self, filename, extensions=None):
100 112 """Checks if a filename has an allowed extension
101 113
102 114 :param filename: base name of file
103 115 :param extensions: iterable of extensions (or self.extensions)
104 116 """
105 117 _, ext = os.path.splitext(filename)
106 118 return self.extension_allowed(ext, extensions)
107 119
108 120 def extension_allowed(self, ext, extensions=None):
109 121 """
110 122 Checks if an extension is permitted. Both e.g. ".jpg" and
111 123 "jpg" can be passed in. Extension lookup is case-insensitive.
112 124
113 125 :param ext: extension to check
114 126 :param extensions: iterable of extensions to validate against (or self.extensions)
115 127 """
116 128
117 129 extensions = extensions or self.extensions
118 130 if not extensions:
119 131 return True
120 132 if ext.startswith('.'):
121 133 ext = ext[1:]
122 134 return ext.lower() in extensions
123 135
124 136 def save_file(self, file_obj, filename, directory=None, extensions=None,
125 137 metadata=None, **kwargs):
126 138 """
127 139 Saves a file object to the uploads location.
128 140 Returns the resolved filename, i.e. the directory +
129 141 the (randomized/incremented) base name.
130 142
131 143 :param file_obj: **cgi.FieldStorage** object (or similar)
132 144 :param filename: original filename
133 145 :param directory: relative path of sub-directory
134 146 :param extensions: iterable of allowed extensions, if not default
135 147 :param metadata: JSON metadata to store next to the file with .meta suffix
136 148 :returns: modified filename
137 149 """
138 150
139 151 extensions = extensions or self.extensions
140 152
141 153 if not self.filename_allowed(filename, extensions):
142 154 raise FileNotAllowedException()
143 155
144 156 if directory:
145 157 dest_directory = os.path.join(self.base_path, directory)
146 158 else:
147 159 dest_directory = self.base_path
148 160
149 161 if not os.path.exists(dest_directory):
150 162 os.makedirs(dest_directory)
151 163
152 164 filename = utils.uid_filename(filename)
153 165
154 166 filename, path = self.resolve_name(filename, dest_directory)
155 filename_meta = filename + '.meta'
156 167
157 168 file_obj.seek(0)
158 169
159 170 with open(path, "wb") as dest:
160 171 shutil.copyfileobj(file_obj, dest)
161 172
162 173 if metadata:
163 174 size = os.stat(path).st_size
164 175 metadata.update(
165 176 {"size": size,
166 177 "time": time.time(),
167 178 "meta_ver": METADATA_VER})
168 179
169 with open(os.path.join(dest_directory, filename_meta), "wb") as dest_meta:
180 stored_file_path = os.path.dirname(path)
181 filename_meta = filename + '.meta'
182 with open(os.path.join(stored_file_path, filename_meta), "wb") as dest_meta:
170 183 dest_meta.write(json.dumps(metadata))
171 184
172 185 if directory:
173 186 filename = os.path.join(directory, filename)
174 187
175 188 return filename, metadata
@@ -1,110 +1,111 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20 import os
21 21 import pytest
22 22
23 23 from rhodecode.lib.ext_json import json
24 24 from rhodecode.tests import TestController
25 25 from rhodecode.apps.file_store import utils, config_keys
26 26
27 27
28 28 def route_path(name, params=None, **kwargs):
29 29 import urllib
30 30
31 31 base_url = {
32 32 'upload_file': '/_file_store/upload',
33 33 'download_file': '/_file_store/download/{fid}',
34 34
35 35 }[name].format(**kwargs)
36 36
37 37 if params:
38 38 base_url = '{}?{}'.format(base_url, urllib.urlencode(params))
39 39 return base_url
40 40
41 41
42 42 class TestFileStoreViews(TestController):
43 43
44 44 @pytest.mark.parametrize("fid, content, exists", [
45 45 ('abcde-0.jpg', "xxxxx", True),
46 46 ('abcde-0.exe', "1234567", True),
47 47 ('abcde-0.jpg', "xxxxx", False),
48 48 ])
49 49 def test_get_files_from_store(self, fid, content, exists, tmpdir):
50 50 self.log_user()
51 51 store_path = self.app._pyramid_settings[config_keys.store_path]
52 52
53 53 if exists:
54 54 status = 200
55 55 store = utils.get_file_storage({config_keys.store_path: store_path})
56 56 filesystem_file = os.path.join(str(tmpdir), fid)
57 57 with open(filesystem_file, 'wb') as f:
58 58 f.write(content)
59 59
60 60 with open(filesystem_file, 'rb') as f:
61 61 fid, metadata = store.save_file(f, fid, metadata={'filename': fid})
62 62
63 63 else:
64 64 status = 404
65 65
66 66 response = self.app.get(route_path('download_file', fid=fid), status=status)
67 67
68 68 if exists:
69 69 assert response.text == content
70 metadata = os.path.join(store_path, fid + '.meta')
71 assert os.path.exists(metadata)
72 with open(metadata, 'rb') as f:
70 file_store_path = os.path.dirname(store.resolve_name(fid, store_path)[1])
71 metadata_file = os.path.join(file_store_path, fid + '.meta')
72 assert os.path.exists(metadata_file)
73 with open(metadata_file, 'rb') as f:
73 74 json_data = json.loads(f.read())
74 75
75 76 assert json_data
76 77 assert 'size' in json_data
77 78
78 79 def test_upload_files_without_content_to_store(self):
79 80 self.log_user()
80 81 response = self.app.post(
81 82 route_path('upload_file'),
82 83 params={'csrf_token': self.csrf_token},
83 84 status=200)
84 85
85 86 assert response.json == {
86 87 u'error': u'store_file data field is missing',
87 88 u'access_path': None,
88 89 u'store_fid': None}
89 90
90 91 def test_upload_files_bogus_content_to_store(self):
91 92 self.log_user()
92 93 response = self.app.post(
93 94 route_path('upload_file'),
94 95 params={'csrf_token': self.csrf_token, 'store_file': 'bogus'},
95 96 status=200)
96 97
97 98 assert response.json == {
98 99 u'error': u'filename cannot be read from the data field',
99 100 u'access_path': None,
100 101 u'store_fid': None}
101 102
102 103 def test_upload_content_to_store(self):
103 104 self.log_user()
104 105 response = self.app.post(
105 106 route_path('upload_file'),
106 107 upload_files=[('store_file', 'myfile.txt', 'SOME CONTENT')],
107 108 params={'csrf_token': self.csrf_token},
108 109 status=200)
109 110
110 111 assert response.json['store_fid']
General Comments 0
You need to be logged in to leave comments. Login now