rhodecode-enterprise-ce Files · rhodecode/apps/file_store/backends/filesystem_legacy.py

feat(artifacts): new artifact storage engines allowing an s3 based uploads

super-admin - - Load All Authors

File last commit:

r5516:3496180b default


                r5516:3496180b

default

Download file

             filesystem_legacy.py
        
                    278 lines
            
             | 9.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / apps / file_store / backends / filesystem_legacy.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        super-admin
    
feat(artifacts): new artifact storage engines allowing an s3 based uploads

              r5516
            
      # Copyright (C) 2016-2023 RhodeCode GmbH

      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      import errno

      import os

      import hashlib

      import functools

      import time

      import logging

      from .. import config_keys

      from ..exceptions import FileOverSizeException

      from ..backends.base import BaseFileStoreBackend, fsspec, BaseShard, ShardFileReader

      from ....lib.ext_json import json

      log = logging.getLogger(__name__)

      class LegacyFileSystemShard(BaseShard):

          # legacy ver

          METADATA_VER = 'v2'

          BACKEND_TYPE = config_keys.backend_legacy_filesystem

          storage_type: str = 'dir_struct'

          # legacy suffix

          metadata_suffix: str = '.meta'

          @classmethod

          def _sub_store_from_filename(cls, filename):

              return filename[:2]

          @classmethod

          def apply_counter(cls, counter, filename):

              name_counted = '%d-%s' % (counter, filename)

              return name_counted

          @classmethod

          def safe_make_dirs(cls, dir_path):

              if not os.path.exists(dir_path):

                  try:

                      os.makedirs(dir_path)

                  except OSError as e:

                      if e.errno != errno.EEXIST:

                          raise

                      return

          @classmethod

          def resolve_name(cls, name, directory):

              """

              Resolves a unique name and the correct path. If a filename

              for that path already exists then a numeric prefix with values > 0 will be

              added, for example test.jpg -> 1-test.jpg etc. initially file would have 0 prefix.

              :param name: base name of file

              :param directory: absolute directory path

              """

              counter = 0

              while True:

                  name_counted = cls.apply_counter(counter, name)

                  # sub_store prefix to optimize disk usage, e.g some_path/ab/final_file

                  sub_store: str = cls._sub_store_from_filename(name_counted)

                  sub_store_path: str = os.path.join(directory, sub_store)

                  cls.safe_make_dirs(sub_store_path)

                  path = os.path.join(sub_store_path, name_counted)

                  if not os.path.exists(path):

                      return name_counted, path

                  counter += 1

          def __init__(self, index, directory, directory_folder, fs, **settings):

              self._index: int = index

              self._directory: str = directory

              self._directory_folder: str = directory_folder

              self.fs = fs

          @property

          def dir_struct(self) -> str:

              """Cache directory final path."""

              return os.path.join(self._directory, '0-')

          def _write_file(self, full_path, iterator, max_filesize, mode='wb'):

              # ensure dir exists

              destination, _ = os.path.split(full_path)

              if not self.fs.exists(destination):

                  self.fs.makedirs(destination)

              writer = self.fs.open(full_path, mode)

              digest = hashlib.sha256()

              oversize_cleanup = False

              with writer:

                  size = 0

                  for chunk in iterator:

                      size += len(chunk)

                      digest.update(chunk)

                      writer.write(chunk)

                      if max_filesize and size > max_filesize:

                          # free up the copied file, and raise exc

                          oversize_cleanup = True

                          break

                  writer.flush()

                  # Get the file descriptor

                  fd = writer.fileno()

                  # Sync the file descriptor to disk, helps with NFS cases...

                  os.fsync(fd)

              if oversize_cleanup:

                  self.fs.rm(full_path)

                  raise FileOverSizeException(f'given file is over size limit ({max_filesize}): {full_path}')

              sha256 = digest.hexdigest()

              log.debug('written new artifact under %s, sha256: %s', full_path, sha256)

              return size, sha256

          def _store(self, key: str, uid_key, value_reader, max_filesize: int | None = None, metadata: dict | None = None, **kwargs):

              filename = key

              uid_filename = uid_key

              # NOTE:, also apply N- Counter...

              uid_filename, full_path = self.resolve_name(uid_filename, self._directory)

              # STORE METADATA

              # TODO: make it compatible, and backward proof

              _metadata = {

                  "version": self.METADATA_VER,

                  "filename": filename,

                  "filename_uid_path": full_path,

                  "filename_uid": uid_filename,

                  "sha256": "",  # NOTE: filled in by reader iteration

                  "store_time": time.time(),

                  "size": 0

              }

              if metadata:

                  _metadata.update(metadata)

              read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')

              size, sha256 = self._write_file(full_path, read_iterator, max_filesize)

              _metadata['size'] = size

              _metadata['sha256'] = sha256

              # after storing the artifacts, we write the metadata present

              _metadata_file, metadata_file_path = self.get_metadata_filename(uid_filename)

              with self.fs.open(metadata_file_path, 'wb') as f:

                  f.write(json.dumps(_metadata))

              return uid_filename, _metadata

          def store_path(self, uid_filename):

              """

              Returns absolute file path of the uid_filename

              """

              prefix_dir = ''

              if '/' in uid_filename:

                  prefix_dir, filename = uid_filename.split('/')

                  sub_store = self._sub_store_from_filename(filename)

              else:

                  sub_store = self._sub_store_from_filename(uid_filename)

              return os.path.join(self._directory, prefix_dir, sub_store, uid_filename)

          def metadata_convert(self, uid_filename, metadata):

              # NOTE: backward compat mode here... this is for file created PRE 5.2 system

              if 'meta_ver' in metadata:

                  full_path = self.store_path(uid_filename)

                  metadata = {

                      "_converted": True,

                      "_org": metadata,

                      "version": self.METADATA_VER,

                      "store_type": self.BACKEND_TYPE,

                      "filename": metadata['filename'],

                      "filename_uid_path": full_path,

                      "filename_uid": uid_filename,

                      "sha256": metadata['sha256'],

                      "store_time": metadata['time'],

                      "size": metadata['size']

                  }

              return metadata

          def _fetch(self, key, presigned_url_expires: int = 0):

              if key not in self:

                  log.exception(f'requested key={key} not found in {self}')

                  raise KeyError(key)

              metadata = self.get_metadata(key)

              file_path = metadata['filename_uid_path']

              if presigned_url_expires and presigned_url_expires > 0:

                  metadata['url'] = self.fs.url(file_path, expires=presigned_url_expires)

              return ShardFileReader(self.fs.open(file_path, 'rb')), metadata

          def delete(self, key):

              return self._delete(key)

          def _delete(self, key):

              if key not in self:

                  log.exception(f'requested key={key} not found in {self}')

                  raise KeyError(key)

              metadata = self.get_metadata(key)

              metadata_file, metadata_file_path = self.get_metadata_filename(key)

              artifact_file_path = metadata['filename_uid_path']

              self.fs.rm(artifact_file_path)

              self.fs.rm(metadata_file_path)

          def get_metadata_filename(self, uid_filename) -> tuple[str, str]:

              metadata_file: str = f'{uid_filename}{self.metadata_suffix}'

              uid_path_in_store = self.store_path(uid_filename)

              metadata_file_path = f'{uid_path_in_store}{self.metadata_suffix}'

              return metadata_file, metadata_file_path

      class LegacyFileSystemBackend(BaseFileStoreBackend):

          _shard_cls = LegacyFileSystemShard

          def __init__(self, settings):

              super().__init__(settings)

              store_dir = self.get_conf(config_keys.legacy_filesystem_storage_path)

              directory = os.path.expanduser(store_dir)

              self._directory = directory

              self._storage_path = directory  # common path for all from BaseCache

              log.debug('Initializing %s file_store instance', self)

              fs = fsspec.filesystem('file')

              if not fs.exists(self._directory):

                  fs.makedirs(self._directory, exist_ok=True)

              # legacy system uses single shard

              self._shards = tuple(

                  [

                      self._shard_cls(

                          index=0,

                          directory=directory,

                          directory_folder='',

                          fs=fs,

                          **settings,

                      )

                  ]

              )

          @classmethod

          def get_shard_index(cls, filename: str, num_shards) -> int:

              # legacy filesystem doesn't use shards, and always uses single shard

              return 0

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

super-admin feat(artifacts): new artifact storage engines allowing an s3 based uploads	r5516	# Copyright (C) 2016-2023 RhodeCode GmbH
		#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/
		import errno
		import os
		import hashlib
		import functools
		import time
		import logging

		from .. import config_keys
		from ..exceptions import FileOverSizeException
		from ..backends.base import BaseFileStoreBackend, fsspec, BaseShard, ShardFileReader

		from ....lib.ext_json import json

		log = logging.getLogger(__name__)


		class LegacyFileSystemShard(BaseShard):
		# legacy ver
		METADATA_VER = 'v2'
		BACKEND_TYPE = config_keys.backend_legacy_filesystem
		storage_type: str = 'dir_struct'

		# legacy suffix
		metadata_suffix: str = '.meta'

		@classmethod
		def _sub_store_from_filename(cls, filename):
		return filename[:2]

		@classmethod
		def apply_counter(cls, counter, filename):
		name_counted = '%d-%s' % (counter, filename)
		return name_counted

		@classmethod
		def safe_make_dirs(cls, dir_path):
		if not os.path.exists(dir_path):
		try:
		os.makedirs(dir_path)
		except OSError as e:
		if e.errno != errno.EEXIST:
		raise
		return

		@classmethod
		def resolve_name(cls, name, directory):
		"""
		Resolves a unique name and the correct path. If a filename
		for that path already exists then a numeric prefix with values > 0 will be
		added, for example test.jpg -> 1-test.jpg etc. initially file would have 0 prefix.

		:param name: base name of file
		:param directory: absolute directory path
		"""

		counter = 0
		while True:
		name_counted = cls.apply_counter(counter, name)

		# sub_store prefix to optimize disk usage, e.g some_path/ab/final_file
		sub_store: str = cls._sub_store_from_filename(name_counted)
		sub_store_path: str = os.path.join(directory, sub_store)
		cls.safe_make_dirs(sub_store_path)

		path = os.path.join(sub_store_path, name_counted)
		if not os.path.exists(path):
		return name_counted, path
		counter += 1

		def __init__(self, index, directory, directory_folder, fs, **settings):
		self._index: int = index
		self._directory: str = directory
		self._directory_folder: str = directory_folder
		self.fs = fs

		@property
		def dir_struct(self) -> str:
		"""Cache directory final path."""
		return os.path.join(self._directory, '0-')

		def _write_file(self, full_path, iterator, max_filesize, mode='wb'):

		# ensure dir exists
		destination, _ = os.path.split(full_path)
		if not self.fs.exists(destination):
		self.fs.makedirs(destination)

		writer = self.fs.open(full_path, mode)

		digest = hashlib.sha256()
		oversize_cleanup = False
		with writer:
		size = 0
		for chunk in iterator:
		size += len(chunk)
		digest.update(chunk)
		writer.write(chunk)

		if max_filesize and size > max_filesize:
		# free up the copied file, and raise exc
		oversize_cleanup = True
		break

		writer.flush()
		# Get the file descriptor
		fd = writer.fileno()

		# Sync the file descriptor to disk, helps with NFS cases...
		os.fsync(fd)

		if oversize_cleanup:
		self.fs.rm(full_path)
		raise FileOverSizeException(f'given file is over size limit ({max_filesize}): {full_path}')

		sha256 = digest.hexdigest()
		log.debug('written new artifact under %s, sha256: %s', full_path, sha256)
		return size, sha256

		def _store(self, key: str, uid_key, value_reader, max_filesize: int \| None = None, metadata: dict \| None = None, **kwargs):

		filename = key
		uid_filename = uid_key

		# NOTE:, also apply N- Counter...
		uid_filename, full_path = self.resolve_name(uid_filename, self._directory)

		# STORE METADATA
		# TODO: make it compatible, and backward proof
		_metadata = {
		"version": self.METADATA_VER,

		"filename": filename,
		"filename_uid_path": full_path,
		"filename_uid": uid_filename,
		"sha256": "", # NOTE: filled in by reader iteration

		"store_time": time.time(),

		"size": 0
		}
		if metadata:
		_metadata.update(metadata)

		read_iterator = iter(functools.partial(value_reader.read, 2**22), b'')
		size, sha256 = self._write_file(full_path, read_iterator, max_filesize)
		_metadata['size'] = size
		_metadata['sha256'] = sha256

		# after storing the artifacts, we write the metadata present
		_metadata_file, metadata_file_path = self.get_metadata_filename(uid_filename)

		with self.fs.open(metadata_file_path, 'wb') as f:
		f.write(json.dumps(_metadata))

		return uid_filename, _metadata

		def store_path(self, uid_filename):
		"""
		Returns absolute file path of the uid_filename
		"""
		prefix_dir = ''
		if '/' in uid_filename:
		prefix_dir, filename = uid_filename.split('/')
		sub_store = self._sub_store_from_filename(filename)
		else:
		sub_store = self._sub_store_from_filename(uid_filename)

		return os.path.join(self._directory, prefix_dir, sub_store, uid_filename)

		def metadata_convert(self, uid_filename, metadata):
		# NOTE: backward compat mode here... this is for file created PRE 5.2 system
		if 'meta_ver' in metadata:
		full_path = self.store_path(uid_filename)
		metadata = {
		"_converted": True,
		"_org": metadata,
		"version": self.METADATA_VER,
		"store_type": self.BACKEND_TYPE,

		"filename": metadata['filename'],
		"filename_uid_path": full_path,
		"filename_uid": uid_filename,
		"sha256": metadata['sha256'],

		"store_time": metadata['time'],

		"size": metadata['size']
		}
		return metadata

		def _fetch(self, key, presigned_url_expires: int = 0):
		if key not in self:
		log.exception(f'requested key={key} not found in {self}')
		raise KeyError(key)

		metadata = self.get_metadata(key)

		file_path = metadata['filename_uid_path']
		if presigned_url_expires and presigned_url_expires > 0:
		metadata['url'] = self.fs.url(file_path, expires=presigned_url_expires)

		return ShardFileReader(self.fs.open(file_path, 'rb')), metadata

		def delete(self, key):
		return self._delete(key)

		def _delete(self, key):
		if key not in self:
		log.exception(f'requested key={key} not found in {self}')
		raise KeyError(key)

		metadata = self.get_metadata(key)
		metadata_file, metadata_file_path = self.get_metadata_filename(key)
		artifact_file_path = metadata['filename_uid_path']
		self.fs.rm(artifact_file_path)
		self.fs.rm(metadata_file_path)

		def get_metadata_filename(self, uid_filename) -> tuple[str, str]:

		metadata_file: str = f'{uid_filename}{self.metadata_suffix}'
		uid_path_in_store = self.store_path(uid_filename)

		metadata_file_path = f'{uid_path_in_store}{self.metadata_suffix}'
		return metadata_file, metadata_file_path


		class LegacyFileSystemBackend(BaseFileStoreBackend):
		_shard_cls = LegacyFileSystemShard

		def __init__(self, settings):
		super().__init__(settings)

		store_dir = self.get_conf(config_keys.legacy_filesystem_storage_path)
		directory = os.path.expanduser(store_dir)

		self._directory = directory
		self._storage_path = directory # common path for all from BaseCache

		log.debug('Initializing %s file_store instance', self)
		fs = fsspec.filesystem('file')

		if not fs.exists(self._directory):
		fs.makedirs(self._directory, exist_ok=True)

		# legacy system uses single shard
		self._shards = tuple(
		[
		self._shard_cls(
		index=0,
		directory=directory,
		directory_folder='',
		fs=fs,
		**settings,
		)
		]
		)

		@classmethod
		def get_shard_index(cls, filename: str, num_shards) -> int:
		# legacy filesystem doesn't use shards, and always uses single shard
		return 0