##// END OF EJS Templates
file-store: use pathlib2 to determine complex filenames with double extensions, e.g exmaple.tar.gz
marcink -
r3728:20990e78 new-ui
parent child Browse files
Show More
@@ -1,224 +1,223 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2019 RhodeCode GmbH
3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 import os
21 import os
22 import time
22 import time
23 import shutil
23 import shutil
24 import hashlib
24 import hashlib
25
25
26 from rhodecode.lib.ext_json import json
26 from rhodecode.lib.ext_json import json
27 from rhodecode.apps.file_store import utils
27 from rhodecode.apps.file_store import utils
28 from rhodecode.apps.file_store.extensions import resolve_extensions
28 from rhodecode.apps.file_store.extensions import resolve_extensions
29 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
29 from rhodecode.apps.file_store.exceptions import FileNotAllowedException
30
30
31 METADATA_VER = 'v1'
31 METADATA_VER = 'v1'
32
32
33
33
34 class LocalFileStorage(object):
34 class LocalFileStorage(object):
35
35
36 @classmethod
36 @classmethod
37 def resolve_name(cls, name, directory):
37 def resolve_name(cls, name, directory):
38 """
38 """
39 Resolves a unique name and the correct path. If a filename
39 Resolves a unique name and the correct path. If a filename
40 for that path already exists then a numeric prefix with values > 0 will be
40 for that path already exists then a numeric prefix with values > 0 will be
41 added, for example test.jpg -> test-1.jpg etc. initially file would have 0 prefix.
41 added, for example test.jpg -> 1-test.jpg etc. initially file would have 0 prefix.
42
42
43 :param name: base name of file
43 :param name: base name of file
44 :param directory: absolute directory path
44 :param directory: absolute directory path
45 """
45 """
46
46
47 basename, ext = os.path.splitext(name)
48 counter = 0
47 counter = 0
49 while True:
48 while True:
50 name = '%s-%d%s' % (basename, counter, ext)
49 name = '%d-%s' % (counter, name)
51
50
52 # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
51 # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
53 sub_store = cls._sub_store_from_filename(basename)
52 sub_store = cls._sub_store_from_filename(name)
54 sub_store_path = os.path.join(directory, sub_store)
53 sub_store_path = os.path.join(directory, sub_store)
55 if not os.path.exists(sub_store_path):
54 if not os.path.exists(sub_store_path):
56 os.makedirs(sub_store_path)
55 os.makedirs(sub_store_path)
57
56
58 path = os.path.join(sub_store_path, name)
57 path = os.path.join(sub_store_path, name)
59 if not os.path.exists(path):
58 if not os.path.exists(path):
60 return name, path
59 return name, path
61 counter += 1
60 counter += 1
62
61
63 @classmethod
62 @classmethod
64 def _sub_store_from_filename(cls, filename):
63 def _sub_store_from_filename(cls, filename):
65 return filename[:2]
64 return filename[:2]
66
65
67 @classmethod
66 @classmethod
68 def calculate_path_hash(cls, file_path):
67 def calculate_path_hash(cls, file_path):
69 """
68 """
70 Efficient calculation of file_path sha256 sum
69 Efficient calculation of file_path sha256 sum
71
70
72 :param file_path:
71 :param file_path:
73 :return: sha256sum
72 :return: sha256sum
74 """
73 """
75 digest = hashlib.sha256()
74 digest = hashlib.sha256()
76 with open(file_path, 'rb') as f:
75 with open(file_path, 'rb') as f:
77 for chunk in iter(lambda: f.read(1024 * 100), b""):
76 for chunk in iter(lambda: f.read(1024 * 100), b""):
78 digest.update(chunk)
77 digest.update(chunk)
79
78
80 return digest.hexdigest()
79 return digest.hexdigest()
81
80
82 def __init__(self, base_path, extension_groups=None):
81 def __init__(self, base_path, extension_groups=None):
83
82
84 """
83 """
85 Local file storage
84 Local file storage
86
85
87 :param base_path: the absolute base path where uploads are stored
86 :param base_path: the absolute base path where uploads are stored
88 :param extension_groups: extensions string
87 :param extension_groups: extensions string
89 """
88 """
90
89
91 extension_groups = extension_groups or ['any']
90 extension_groups = extension_groups or ['any']
92 self.base_path = base_path
91 self.base_path = base_path
93 self.extensions = resolve_extensions([], groups=extension_groups)
92 self.extensions = resolve_extensions([], groups=extension_groups)
94
93
95 def store_path(self, filename):
94 def store_path(self, filename):
96 """
95 """
97 Returns absolute file path of the filename, joined to the
96 Returns absolute file path of the filename, joined to the
98 base_path.
97 base_path.
99
98
100 :param filename: base name of file
99 :param filename: base name of file
101 """
100 """
102 sub_store = self._sub_store_from_filename(filename)
101 sub_store = self._sub_store_from_filename(filename)
103 return os.path.join(self.base_path, sub_store, filename)
102 return os.path.join(self.base_path, sub_store, filename)
104
103
105 def delete(self, filename):
104 def delete(self, filename):
106 """
105 """
107 Deletes the filename. Filename is resolved with the
106 Deletes the filename. Filename is resolved with the
108 absolute path based on base_path. If file does not exist,
107 absolute path based on base_path. If file does not exist,
109 returns **False**, otherwise **True**
108 returns **False**, otherwise **True**
110
109
111 :param filename: base name of file
110 :param filename: base name of file
112 """
111 """
113 if self.exists(filename):
112 if self.exists(filename):
114 os.remove(self.store_path(filename))
113 os.remove(self.store_path(filename))
115 return True
114 return True
116 return False
115 return False
117
116
118 def exists(self, filename):
117 def exists(self, filename):
119 """
118 """
120 Checks if file exists. Resolves filename's absolute
119 Checks if file exists. Resolves filename's absolute
121 path based on base_path.
120 path based on base_path.
122
121
123 :param filename: base name of file
122 :param filename: base name of file
124 """
123 """
125 return os.path.exists(self.store_path(filename))
124 return os.path.exists(self.store_path(filename))
126
125
127 def filename_allowed(self, filename, extensions=None):
126 def filename_allowed(self, filename, extensions=None):
128 """Checks if a filename has an allowed extension
127 """Checks if a filename has an allowed extension
129
128
130 :param filename: base name of file
129 :param filename: base name of file
131 :param extensions: iterable of extensions (or self.extensions)
130 :param extensions: iterable of extensions (or self.extensions)
132 """
131 """
133 _, ext = os.path.splitext(filename)
132 _, ext = os.path.splitext(filename)
134 return self.extension_allowed(ext, extensions)
133 return self.extension_allowed(ext, extensions)
135
134
136 def extension_allowed(self, ext, extensions=None):
135 def extension_allowed(self, ext, extensions=None):
137 """
136 """
138 Checks if an extension is permitted. Both e.g. ".jpg" and
137 Checks if an extension is permitted. Both e.g. ".jpg" and
139 "jpg" can be passed in. Extension lookup is case-insensitive.
138 "jpg" can be passed in. Extension lookup is case-insensitive.
140
139
141 :param ext: extension to check
140 :param ext: extension to check
142 :param extensions: iterable of extensions to validate against (or self.extensions)
141 :param extensions: iterable of extensions to validate against (or self.extensions)
143 """
142 """
144
143
145 extensions = extensions or self.extensions
144 extensions = extensions or self.extensions
146 if not extensions:
145 if not extensions:
147 return True
146 return True
148 if ext.startswith('.'):
147 if ext.startswith('.'):
149 ext = ext[1:]
148 ext = ext[1:]
150 return ext.lower() in extensions
149 return ext.lower() in extensions
151
150
152 def save_file(self, file_obj, filename, directory=None, extensions=None,
151 def save_file(self, file_obj, filename, directory=None, extensions=None,
153 extra_metadata=None, **kwargs):
152 extra_metadata=None, **kwargs):
154 """
153 """
155 Saves a file object to the uploads location.
154 Saves a file object to the uploads location.
156 Returns the resolved filename, i.e. the directory +
155 Returns the resolved filename, i.e. the directory +
157 the (randomized/incremented) base name.
156 the (randomized/incremented) base name.
158
157
159 :param file_obj: **cgi.FieldStorage** object (or similar)
158 :param file_obj: **cgi.FieldStorage** object (or similar)
160 :param filename: original filename
159 :param filename: original filename
161 :param directory: relative path of sub-directory
160 :param directory: relative path of sub-directory
162 :param extensions: iterable of allowed extensions, if not default
161 :param extensions: iterable of allowed extensions, if not default
163 :param extra_metadata: extra JSON metadata to store next to the file with .meta suffix
162 :param extra_metadata: extra JSON metadata to store next to the file with .meta suffix
164 """
163 """
165
164
166 extensions = extensions or self.extensions
165 extensions = extensions or self.extensions
167
166
168 if not self.filename_allowed(filename, extensions):
167 if not self.filename_allowed(filename, extensions):
169 raise FileNotAllowedException()
168 raise FileNotAllowedException()
170
169
171 if directory:
170 if directory:
172 dest_directory = os.path.join(self.base_path, directory)
171 dest_directory = os.path.join(self.base_path, directory)
173 else:
172 else:
174 dest_directory = self.base_path
173 dest_directory = self.base_path
175
174
176 if not os.path.exists(dest_directory):
175 if not os.path.exists(dest_directory):
177 os.makedirs(dest_directory)
176 os.makedirs(dest_directory)
178
177
179 filename = utils.uid_filename(filename)
178 filename = utils.uid_filename(filename)
180
179
181 # resolve also produces special sub-dir for file optimized store
180 # resolve also produces special sub-dir for file optimized store
182 filename, path = self.resolve_name(filename, dest_directory)
181 filename, path = self.resolve_name(filename, dest_directory)
183 stored_file_dir = os.path.dirname(path)
182 stored_file_dir = os.path.dirname(path)
184
183
185 file_obj.seek(0)
184 file_obj.seek(0)
186
185
187 with open(path, "wb") as dest:
186 with open(path, "wb") as dest:
188 shutil.copyfileobj(file_obj, dest)
187 shutil.copyfileobj(file_obj, dest)
189
188
190 metadata = {}
189 metadata = {}
191 if extra_metadata:
190 if extra_metadata:
192 metadata = extra_metadata
191 metadata = extra_metadata
193
192
194 size = os.stat(path).st_size
193 size = os.stat(path).st_size
195 file_hash = self.calculate_path_hash(path)
194 file_hash = self.calculate_path_hash(path)
196
195
197 metadata.update(
196 metadata.update(
198 {"filename": filename,
197 {"filename": filename,
199 "size": size,
198 "size": size,
200 "time": time.time(),
199 "time": time.time(),
201 "sha256": file_hash,
200 "sha256": file_hash,
202 "meta_ver": METADATA_VER})
201 "meta_ver": METADATA_VER})
203
202
204 filename_meta = filename + '.meta'
203 filename_meta = filename + '.meta'
205 with open(os.path.join(stored_file_dir, filename_meta), "wb") as dest_meta:
204 with open(os.path.join(stored_file_dir, filename_meta), "wb") as dest_meta:
206 dest_meta.write(json.dumps(metadata))
205 dest_meta.write(json.dumps(metadata))
207
206
208 if directory:
207 if directory:
209 filename = os.path.join(directory, filename)
208 filename = os.path.join(directory, filename)
210
209
211 return filename, metadata
210 return filename, metadata
212
211
213 def get_metadata(self, filename):
212 def get_metadata(self, filename):
214 """
213 """
215 Reads JSON stored metadata for a file
214 Reads JSON stored metadata for a file
216
215
217 :param filename:
216 :param filename:
218 :return:
217 :return:
219 """
218 """
220 filename = self.store_path(filename)
219 filename = self.store_path(filename)
221 filename_meta = filename + '.meta'
220 filename_meta = filename + '.meta'
222
221
223 with open(filename_meta, "rb") as source_meta:
222 with open(filename_meta, "rb") as source_meta:
224 return json.loads(source_meta.read())
223 return json.loads(source_meta.read())
@@ -1,47 +1,54 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2016-2019 RhodeCode GmbH
3 # Copyright (C) 2016-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 import os
23 import uuid
22 import uuid
24
23
24 import pathlib2
25
25
26
26 def get_file_storage(settings):
27 def get_file_storage(settings):
27 from rhodecode.apps.file_store.local_store import LocalFileStorage
28 from rhodecode.apps.file_store.local_store import LocalFileStorage
28 from rhodecode.apps.file_store import config_keys
29 from rhodecode.apps.file_store import config_keys
29 store_path = settings.get(config_keys.store_path)
30 store_path = settings.get(config_keys.store_path)
30 return LocalFileStorage(base_path=store_path)
31 return LocalFileStorage(base_path=store_path)
31
32
32
33
34 def splitext(filename):
35 ext = ''.join(pathlib2.Path(filename).suffixes)
36 return filename, ext
37
38
33 def uid_filename(filename, randomized=True):
39 def uid_filename(filename, randomized=True):
34 """
40 """
35 Generates a randomized or stable (uuid) filename,
41 Generates a randomized or stable (uuid) filename,
36 preserving the original extension.
42 preserving the original extension.
37
43
38 :param filename: the original filename
44 :param filename: the original filename
39 :param randomized: define if filename should be stable (sha1 based) or randomized
45 :param randomized: define if filename should be stable (sha1 based) or randomized
40 """
46 """
41 _, ext = os.path.splitext(filename)
47
48 _, ext = splitext(filename)
42 if randomized:
49 if randomized:
43 uid = uuid.uuid4()
50 uid = uuid.uuid4()
44 else:
51 else:
45 hash_key = '{}.{}'.format(filename, 'store')
52 hash_key = '{}.{}'.format(filename, 'store')
46 uid = uuid.uuid5(uuid.NAMESPACE_URL, hash_key)
53 uid = uuid.uuid5(uuid.NAMESPACE_URL, hash_key)
47 return str(uid) + ext.lower()
54 return str(uid) + ext.lower()
General Comments 0
You need to be logged in to leave comments. Login now