local_store.py
268 lines
| 8.7 KiB
| text/x-python
|
PythonLexer
r5088 | # Copyright (C) 2016-2023 RhodeCode GmbH | |||
r4012 | # | |||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU Affero General Public License, version 3 | ||||
# (only), as published by the Free Software Foundation. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU Affero General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
# | ||||
# This program is dual-licensed. If you wish to learn more about the | ||||
# RhodeCode Enterprise Edition, including its added features, Support services, | ||||
# and proprietary license terms, please see https://rhodecode.com/licenses/ | ||||
import os | ||||
import time | ||||
r4476 | import errno | |||
r4012 | import hashlib | |||
from rhodecode.lib.ext_json import json | ||||
from rhodecode.apps.file_store import utils | ||||
from rhodecode.apps.file_store.extensions import resolve_extensions | ||||
from rhodecode.apps.file_store.exceptions import ( | ||||
FileNotAllowedException, FileOverSizeException) | ||||
METADATA_VER = 'v1' | ||||
r4476 | def safe_make_dirs(dir_path): | |||
if not os.path.exists(dir_path): | ||||
try: | ||||
os.makedirs(dir_path) | ||||
except OSError as e: | ||||
if e.errno != errno.EEXIST: | ||||
raise | ||||
return | ||||
r4012 | class LocalFileStorage(object): | |||
@classmethod | ||||
r4476 | def apply_counter(cls, counter, filename): | |||
name_counted = '%d-%s' % (counter, filename) | ||||
return name_counted | ||||
@classmethod | ||||
r4012 | def resolve_name(cls, name, directory): | |||
""" | ||||
Resolves a unique name and the correct path. If a filename | ||||
for that path already exists then a numeric prefix with values > 0 will be | ||||
added, for example test.jpg -> 1-test.jpg etc. initially file would have 0 prefix. | ||||
:param name: base name of file | ||||
:param directory: absolute directory path | ||||
""" | ||||
counter = 0 | ||||
while True: | ||||
r4476 | name_counted = cls.apply_counter(counter, name) | |||
r4012 | ||||
# sub_store prefix to optimize disk usage, e.g some_path/ab/final_file | ||||
r4476 | sub_store = cls._sub_store_from_filename(name_counted) | |||
r4012 | sub_store_path = os.path.join(directory, sub_store) | |||
r4476 | safe_make_dirs(sub_store_path) | |||
r4012 | ||||
r4476 | path = os.path.join(sub_store_path, name_counted) | |||
r4012 | if not os.path.exists(path): | |||
r4476 | return name_counted, path | |||
r4012 | counter += 1 | |||
@classmethod | ||||
def _sub_store_from_filename(cls, filename): | ||||
return filename[:2] | ||||
@classmethod | ||||
def calculate_path_hash(cls, file_path): | ||||
""" | ||||
Efficient calculation of file_path sha256 sum | ||||
:param file_path: | ||||
:return: sha256sum | ||||
""" | ||||
digest = hashlib.sha256() | ||||
with open(file_path, 'rb') as f: | ||||
for chunk in iter(lambda: f.read(1024 * 100), b""): | ||||
digest.update(chunk) | ||||
return digest.hexdigest() | ||||
def __init__(self, base_path, extension_groups=None): | ||||
""" | ||||
Local file storage | ||||
:param base_path: the absolute base path where uploads are stored | ||||
:param extension_groups: extensions string | ||||
""" | ||||
extension_groups = extension_groups or ['any'] | ||||
self.base_path = base_path | ||||
self.extensions = resolve_extensions([], groups=extension_groups) | ||||
def __repr__(self): | ||||
r5095 | return f'{self.__class__}@{self.base_path}' | |||
r4012 | ||||
def store_path(self, filename): | ||||
""" | ||||
Returns absolute file path of the filename, joined to the | ||||
base_path. | ||||
:param filename: base name of file | ||||
""" | ||||
r4476 | prefix_dir = '' | |||
if '/' in filename: | ||||
prefix_dir, filename = filename.split('/') | ||||
sub_store = self._sub_store_from_filename(filename) | ||||
else: | ||||
sub_store = self._sub_store_from_filename(filename) | ||||
return os.path.join(self.base_path, prefix_dir, sub_store, filename) | ||||
r4012 | ||||
def delete(self, filename): | ||||
""" | ||||
Deletes the filename. Filename is resolved with the | ||||
absolute path based on base_path. If file does not exist, | ||||
returns **False**, otherwise **True** | ||||
:param filename: base name of file | ||||
""" | ||||
if self.exists(filename): | ||||
os.remove(self.store_path(filename)) | ||||
return True | ||||
return False | ||||
def exists(self, filename): | ||||
""" | ||||
Checks if file exists. Resolves filename's absolute | ||||
path based on base_path. | ||||
r4476 | :param filename: file_uid name of file, e.g 0-f62b2b2d-9708-4079-a071-ec3f958448d4.svg | |||
r4012 | """ | |||
return os.path.exists(self.store_path(filename)) | ||||
def filename_allowed(self, filename, extensions=None): | ||||
"""Checks if a filename has an allowed extension | ||||
:param filename: base name of file | ||||
:param extensions: iterable of extensions (or self.extensions) | ||||
""" | ||||
_, ext = os.path.splitext(filename) | ||||
return self.extension_allowed(ext, extensions) | ||||
def extension_allowed(self, ext, extensions=None): | ||||
""" | ||||
Checks if an extension is permitted. Both e.g. ".jpg" and | ||||
"jpg" can be passed in. Extension lookup is case-insensitive. | ||||
:param ext: extension to check | ||||
:param extensions: iterable of extensions to validate against (or self.extensions) | ||||
""" | ||||
def normalize_ext(_ext): | ||||
if _ext.startswith('.'): | ||||
_ext = _ext[1:] | ||||
return _ext.lower() | ||||
extensions = extensions or self.extensions | ||||
if not extensions: | ||||
return True | ||||
ext = normalize_ext(ext) | ||||
return ext in [normalize_ext(x) for x in extensions] | ||||
def save_file(self, file_obj, filename, directory=None, extensions=None, | ||||
r4476 | extra_metadata=None, max_filesize=None, randomized_name=True, **kwargs): | |||
r4012 | """ | |||
Saves a file object to the uploads location. | ||||
Returns the resolved filename, i.e. the directory + | ||||
the (randomized/incremented) base name. | ||||
:param file_obj: **cgi.FieldStorage** object (or similar) | ||||
:param filename: original filename | ||||
:param directory: relative path of sub-directory | ||||
:param extensions: iterable of allowed extensions, if not default | ||||
:param max_filesize: maximum size of file that should be allowed | ||||
r4476 | :param randomized_name: generate random generated UID or fixed based on the filename | |||
r4012 | :param extra_metadata: extra JSON metadata to store next to the file with .meta suffix | |||
""" | ||||
extensions = extensions or self.extensions | ||||
if not self.filename_allowed(filename, extensions): | ||||
raise FileNotAllowedException() | ||||
if directory: | ||||
dest_directory = os.path.join(self.base_path, directory) | ||||
else: | ||||
dest_directory = self.base_path | ||||
r4476 | safe_make_dirs(dest_directory) | |||
r4012 | ||||
r4476 | uid_filename = utils.uid_filename(filename, randomized=randomized_name) | |||
r4012 | ||||
# resolve also produces special sub-dir for file optimized store | ||||
r4476 | filename, path = self.resolve_name(uid_filename, dest_directory) | |||
r4012 | stored_file_dir = os.path.dirname(path) | |||
r4611 | no_body_seek = kwargs.pop('no_body_seek', False) | |||
if no_body_seek: | ||||
pass | ||||
else: | ||||
file_obj.seek(0) | ||||
r4012 | ||||
with open(path, "wb") as dest: | ||||
r4611 | length = 256 * 1024 | |||
while 1: | ||||
buf = file_obj.read(length) | ||||
if not buf: | ||||
break | ||||
dest.write(buf) | ||||
r4012 | ||||
metadata = {} | ||||
if extra_metadata: | ||||
metadata = extra_metadata | ||||
size = os.stat(path).st_size | ||||
if max_filesize and size > max_filesize: | ||||
# free up the copied file, and raise exc | ||||
os.remove(path) | ||||
raise FileOverSizeException() | ||||
file_hash = self.calculate_path_hash(path) | ||||
r4476 | metadata.update({ | |||
"filename": filename, | ||||
r4012 | "size": size, | |||
"time": time.time(), | ||||
"sha256": file_hash, | ||||
r4476 | "meta_ver": METADATA_VER | |||
}) | ||||
r4012 | ||||
filename_meta = filename + '.meta' | ||||
with open(os.path.join(stored_file_dir, filename_meta), "wb") as dest_meta: | ||||
dest_meta.write(json.dumps(metadata)) | ||||
if directory: | ||||
filename = os.path.join(directory, filename) | ||||
return filename, metadata | ||||
r4661 | def get_metadata(self, filename, ignore_missing=False): | |||
r4012 | """ | |||
Reads JSON stored metadata for a file | ||||
:param filename: | ||||
:return: | ||||
""" | ||||
filename = self.store_path(filename) | ||||
filename_meta = filename + '.meta' | ||||
r4661 | if ignore_missing and not os.path.isfile(filename_meta): | |||
return {} | ||||
r4012 | with open(filename_meta, "rb") as source_meta: | |||
return json.loads(source_meta.read()) | ||||