diff --git a/rhodecode/lib/vcs/__init__.py b/rhodecode/lib/vcs/__init__.py --- a/rhodecode/lib/vcs/__init__.py +++ b/rhodecode/lib/vcs/__init__.py @@ -140,8 +140,8 @@ class CurlSession(object): curl.perform() status_code = curl.getinfo(pycurl.HTTP_CODE) - - return CurlResponse(response_buffer, status_code) + content_type = curl.getinfo(pycurl.CONTENT_TYPE) + return CurlResponse(response_buffer, status_code, content_type) class CurlResponse(object): @@ -153,9 +153,13 @@ class CurlResponse(object): `requests` as a drop in replacement for benchmarking purposes. """ - def __init__(self, response_buffer, status_code): + def __init__(self, response_buffer, status_code, content_type=''): self._response_buffer = response_buffer self._status_code = status_code + self._content_type = content_type + + def __repr__(self): + return f'CurlResponse(code={self._status_code}, content_type={self._content_type})' @property def content(self): @@ -168,6 +172,10 @@ class CurlResponse(object): def status_code(self): return self._status_code + @property + def content_type(self): + return self._content_type + def iter_content(self, chunk_size): self._response_buffer.seek(0) while 1: diff --git a/rhodecode/lib/vcs/backends/__init__.py b/rhodecode/lib/vcs/backends/__init__.py --- a/rhodecode/lib/vcs/backends/__init__.py +++ b/rhodecode/lib/vcs/backends/__init__.py @@ -25,7 +25,7 @@ VCS Backends module import os import logging -from pprint import pformat +from rhodecode import typing from rhodecode.lib.vcs.conf import settings from rhodecode.lib.vcs.exceptions import VCSError @@ -36,7 +36,7 @@ from rhodecode.lib.vcs.utils.imports imp log = logging.getLogger(__name__) -def get_vcs_instance(repo_path, *args, **kwargs): +def get_vcs_instance(repo_path, *args, **kwargs) -> typing.VCSRepo | None: """ Given a path to a repository an instance of the corresponding vcs backend repository class is created and returned. If no repository can be found @@ -54,10 +54,10 @@ def get_vcs_instance(repo_path, *args, * backend = get_backend(vcs_alias) if explicit_vcs_alias: - # do final verification of existance of the path, this does the + # do final verification of existence of the path, this does the # same as get_scm() call which we skip in explicit_vcs_alias if not os.path.isdir(repo_path): - raise VCSError("Given path %s is not a directory" % repo_path) + raise VCSError(f"Given path {repo_path} is not a directory") except VCSError: log.exception( 'Perhaps this repository is in db and not in ' @@ -68,15 +68,15 @@ def get_vcs_instance(repo_path, *args, * return backend(repo_path=repo_path, *args, **kwargs) -def get_backend(alias): +def get_backend(alias) -> typing.VCSRepoClass: """ Returns ``Repository`` class identified by the given alias or raises VCSError if alias is not recognized or backend class cannot be imported. """ if alias not in settings.BACKENDS: raise VCSError( - "Given alias '%s' is not recognized! Allowed aliases:\n%s" % - (alias, pformat(settings.BACKENDS.keys()))) + f"Given alias '{alias}' is not recognized! " + f"Allowed aliases:{settings.BACKENDS.keys()}") backend_path = settings.BACKENDS[alias] klass = import_class(backend_path) return klass diff --git a/rhodecode/lib/vcs/backends/base.py b/rhodecode/lib/vcs/backends/base.py --- a/rhodecode/lib/vcs/backends/base.py +++ b/rhodecode/lib/vcs/backends/base.py @@ -29,7 +29,7 @@ import datetime import fnmatch import itertools import logging -import collections +import dataclasses import warnings from zope.cachedescriptors.property import Lazy as LazyProperty @@ -55,10 +55,17 @@ FILEMODE_DEFAULT = 0o100644 FILEMODE_EXECUTABLE = 0o100755 EMPTY_COMMIT_ID = '0' * 40 -_Reference = collections.namedtuple('Reference', ('type', 'name', 'commit_id')) +@dataclasses.dataclass +class Reference: + type: str + name: str + commit_id: str -class Reference(_Reference): + def __iter__(self): + yield self.type + yield self.name + yield self.commit_id @property def branch(self): @@ -74,8 +81,15 @@ class Reference(_Reference): def to_str(self): return reference_to_unicode(self) + def asdict(self): + return dict( + type=self.type, + name=self.name, + commit_id=self.commit_id + ) -def unicode_to_reference(raw): + +def unicode_to_reference(raw: str): """ Convert a unicode (or string) to a reference object. If unicode evaluates to False it returns None. @@ -220,8 +234,8 @@ class MergeResponse(object): # Deprecations MergeFailureReason._DEPRECATED_MISSING_COMMIT: lazy_ugettext( - u'This pull request cannot be merged because the target or the ' - u'source reference is missing.'), + 'This pull request cannot be merged because the target or the ' + 'source reference is missing.'), } @@ -972,6 +986,9 @@ class BaseCommit(object): d.pop('repository', None) return d + def get_remote(self): + return self._remote + def serialize(self): return self.__json__() @@ -1097,7 +1114,7 @@ class BaseCommit(object): return author_email(self.author) - def get_file_mode(self, path): + def get_file_mode(self, path: bytes): """ Returns stat mode of the file at `path`. """ @@ -1115,7 +1132,13 @@ class BaseCommit(object): """ raise NotImplementedError - def get_file_content(self, path): + def node_md5_hash(self, path): + """ + Returns md5 hash of a node data + """ + raise NotImplementedError + + def get_file_content(self, path) -> bytes: """ Returns content of the file at the given `path`. """ @@ -1168,7 +1191,7 @@ class BaseCommit(object): """ raise NotImplementedError - def get_nodes(self, path): + def get_nodes(self, path, pre_load=None): """ Returns combined ``DirNode`` and ``FileNode`` objects list representing state of commit at the given ``path``. @@ -1194,13 +1217,13 @@ class BaseCommit(object): """ return None - def archive_repo(self, archive_dest_path, kind='tgz', subrepos=None, + def archive_repo(self, archive_name_key, kind='tgz', subrepos=None, archive_dir_name=None, write_metadata=False, mtime=None, - archive_at_path='/'): + archive_at_path='/', cache_config=None): """ Creates an archive containing the contents of the repository. - :param archive_dest_path: path to the file which to create the archive. + :param archive_name_key: unique key under this archive should be generated :param kind: one of following: ``"tbz2"``, ``"tgz"``, ``"zip"``. :param archive_dir_name: name of root directory in archive. Default is repository name and commit's short_id joined with dash: @@ -1209,9 +1232,11 @@ class BaseCommit(object): :param mtime: custom modification time for archive creation, defaults to time.time() if not given. :param archive_at_path: pack files at this path (default '/') + :param cache_config: config spec to send to vcsserver to configure the backend to store files :raise VCSError: If prefix has a problem. """ + cache_config = cache_config or {} allowed_kinds = [x[0] for x in settings.ARCHIVE_SPECS] if kind not in allowed_kinds: raise ImproperArchiveTypeError( @@ -1223,8 +1248,8 @@ class BaseCommit(object): commit_id = self.raw_id return self.repository._remote.archive_repo( - archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config) def _validate_archive_prefix(self, archive_dir_name): if archive_dir_name is None: @@ -1232,11 +1257,13 @@ class BaseCommit(object): repo_name=safe_str(self.repository.name), short_id=self.short_id) elif not isinstance(archive_dir_name, str): - raise ValueError("prefix not a bytes object: %s" % repr(archive_dir_name)) + raise ValueError(f"archive_dir_name is not str object but: {type(archive_dir_name)}") elif archive_dir_name.startswith('/'): raise VCSError("Prefix cannot start with leading slash") elif archive_dir_name.strip() == '': raise VCSError("Prefix cannot be empty") + elif not archive_dir_name.isascii(): + raise VCSError("Prefix cannot contain non ascii characters") return archive_dir_name @LazyProperty @@ -1321,14 +1348,28 @@ class BaseCommit(object): """ Similar to os.walk method. Insted of filesystem it walks through commit starting at given ``topurl``. Returns generator of tuples - (topnode, dirnodes, filenodes). + (top_node, dirnodes, filenodes). """ - topnode = self.get_node(topurl) - if not topnode.is_dir(): + from rhodecode.lib.vcs.nodes import DirNode + + if isinstance(topurl, DirNode): + top_node = topurl + else: + top_node = self.get_node(topurl) + + has_default_pre_load = False + if isinstance(top_node, DirNode): + # used to inject as we walk same defaults as given top_node + default_pre_load = top_node.default_pre_load + has_default_pre_load = True + + if not top_node.is_dir(): return - yield (topnode, topnode.dirs, topnode.files) - for dirnode in topnode.dirs: - for tup in self.walk(dirnode.path): + yield top_node, top_node.dirs, top_node.files + for dir_node in top_node.dirs: + if has_default_pre_load: + dir_node.default_pre_load = default_pre_load + for tup in self.walk(dir_node): yield tup def get_filenodes_generator(self): @@ -1345,15 +1386,15 @@ class BaseCommit(object): def no_node_at_path(self, path): return NodeDoesNotExistError( - u"There is no file nor directory at the given path: " - u"`%s` at commit %s" % (safe_unicode(path), self.short_id)) + f"There is no file nor directory at the given path: " + f"`{safe_str(path)}` at commit {self.short_id}") - def _fix_path(self, path): + def _fix_path(self, path: str) -> str: """ Paths are stored without trailing slash so we need to get rid off it if needed. """ - return path.rstrip('/') + return safe_str(path).rstrip('/') # # Deprecated API based on changesets @@ -1380,9 +1421,7 @@ class BaseChangesetClass(type): return isinstance(instance, BaseCommit) -class BaseChangeset(BaseCommit): - - __metaclass__ = BaseChangesetClass +class BaseChangeset(BaseCommit, metaclass=BaseChangesetClass): def __new__(cls, *args, **kwargs): warnings.warn( @@ -1624,9 +1663,7 @@ class BaseInMemoryChangesetClass(type): return isinstance(instance, BaseInMemoryCommit) -class BaseInMemoryChangeset(BaseInMemoryCommit): - - __metaclass__ = BaseInMemoryChangesetClass +class BaseInMemoryChangeset(BaseInMemoryCommit, metaclass=BaseInMemoryChangesetClass): def __new__(cls, *args, **kwargs): warnings.warn( @@ -1676,14 +1713,14 @@ class EmptyCommit(BaseCommit): def id(self): return self.raw_id - def get_path_commit(self, path): + def get_path_commit(self, path, pre_load=None): return self - def get_file_content(self, path): - return u'' + def get_file_content(self, path) -> bytes: + return b'' def get_file_content_streamed(self, path): - yield self.get_file_content() + yield self.get_file_content(path) def get_file_size(self, path): return 0 @@ -1695,9 +1732,7 @@ class EmptyChangesetClass(type): return isinstance(instance, EmptyCommit) -class EmptyChangeset(EmptyCommit): - - __metaclass__ = EmptyChangesetClass +class EmptyChangeset(EmptyCommit, metaclass=EmptyChangesetClass): def __new__(cls, *args, **kwargs): warnings.warn( @@ -1731,7 +1766,7 @@ class EmptyRepository(BaseRepository): def get_diff(self, *args, **kwargs): from rhodecode.lib.vcs.backends.git.diff import GitDiff - return GitDiff('') + return GitDiff(b'') class CollectionGenerator(object): @@ -1739,8 +1774,7 @@ class CollectionGenerator(object): def __init__(self, repo, commit_ids, collection_size=None, pre_load=None, translate_tag=None): self.repo = repo self.commit_ids = commit_ids - # TODO: (oliver) this isn't currently hooked up - self.collection_size = None + self.collection_size = collection_size self.pre_load = pre_load self.translate_tag = translate_tag @@ -1762,11 +1796,16 @@ class CollectionGenerator(object): commit_id=commit_id, pre_load=self.pre_load, translate_tag=self.translate_tag) - def __getslice__(self, i, j): - """ - Returns an iterator of sliced repository - """ - commit_ids = self.commit_ids[i:j] + def __getitem__(self, key): + """Return either a single element by index, or a sliced collection.""" + + if isinstance(key, slice): + commit_ids = self.commit_ids[key.start:key.stop] + + else: + # single item + commit_ids = self.commit_ids[key] + return self.__class__( self.repo, commit_ids, pre_load=self.pre_load, translate_tag=self.translate_tag) @@ -1830,10 +1869,16 @@ class Diff(object): :attr:`_header_re` and :attr:`_meta_re`. """ _meta_re = None - _header_re = None + _header_re: bytes = re.compile(br"") - def __init__(self, raw_diff): - self.raw = raw_diff + def __init__(self, raw_diff: bytes): + if not isinstance(raw_diff, bytes): + raise Exception(f'raw_diff must be bytes - got {type(raw_diff)}') + + self.raw = memoryview(raw_diff) + + def get_header_re(self): + return self._header_re def chunks(self): """ @@ -1842,35 +1887,44 @@ class Diff(object): we can detect last chunk as this was also has special rule """ - diff_parts = ('\n' + self.raw).split('\ndiff --git') - header = diff_parts[0] - - if self._meta_re: - match = self._meta_re.match(header) + diff_parts = (b'\n' + bytes(self.raw)).split(b'\ndiff --git') chunks = diff_parts[1:] total_chunks = len(chunks) - return ( - DiffChunk(chunk, self, cur_chunk == total_chunks) - for cur_chunk, chunk in enumerate(chunks, start=1)) + def diff_iter(_chunks): + for cur_chunk, chunk in enumerate(_chunks, start=1): + yield DiffChunk(chunk, self, cur_chunk == total_chunks) + return diff_iter(chunks) class DiffChunk(object): - def __init__(self, chunk, diff, last_chunk): - self._diff = diff + def __init__(self, chunk: bytes, diff_obj: Diff, is_last_chunk: bool): + self.diff_obj = diff_obj # since we split by \ndiff --git that part is lost from original diff # we need to re-apply it at the end, EXCEPT ! if it's last chunk - if not last_chunk: - chunk += '\n' - - match = self._diff._header_re.match(chunk) + if not is_last_chunk: + chunk += b'\n' + header_re = self.diff_obj.get_header_re() + match = header_re.match(chunk) self.header = match.groupdict() self.diff = chunk[match.end():] self.raw = chunk + @property + def header_as_str(self): + if self.header: + def safe_str_on_bytes(val): + if isinstance(val, bytes): + return safe_str(val) + return val + return {safe_str(k): safe_str_on_bytes(v) for k, v in self.header.items()} + + def __repr__(self): + return f'DiffChunk({self.header_as_str})' + class BasePathPermissionChecker(object): @@ -1885,10 +1939,10 @@ class BasePathPermissionChecker(object): @property def has_full_access(self): - raise NotImplemented() + raise NotImplementedError() def has_access(self, path): - raise NotImplemented() + raise NotImplementedError() class AllPathPermissionChecker(BasePathPermissionChecker): diff --git a/rhodecode/lib/vcs/backends/git/commit.py b/rhodecode/lib/vcs/backends/git/commit.py --- a/rhodecode/lib/vcs/backends/git/commit.py +++ b/rhodecode/lib/vcs/backends/git/commit.py @@ -22,8 +22,6 @@ GIT commit module """ -import re -import io import stat import configparser from itertools import chain @@ -31,9 +29,7 @@ from itertools import chain from zope.cachedescriptors.property import Lazy as LazyProperty from rhodecode.lib.datelib import utcdate_fromtimestamp -from rhodecode.lib.utils import safe_unicode, safe_str -from rhodecode.lib.utils2 import safe_int -from rhodecode.lib.vcs.conf import settings +from rhodecode.lib.str_utils import safe_bytes, safe_str from rhodecode.lib.vcs.backends import base from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError from rhodecode.lib.vcs.nodes import ( @@ -92,7 +88,7 @@ class GitCommit(base.BaseCommit): for attr, value in result.items(): if attr in ["author", "message"]: if value: - value = safe_unicode(value) + value = safe_str(value) elif attr == "date": value = utcdate_fromtimestamp(*value) elif attr == "parents": @@ -119,15 +115,15 @@ class GitCommit(base.BaseCommit): @LazyProperty def message(self): - return safe_unicode(self._remote.message(self.id)) + return safe_str(self._remote.message(self.id)) @LazyProperty def committer(self): - return safe_unicode(self._remote.author(self.id)) + return safe_str(self._remote.author(self.id)) @LazyProperty def author(self): - return safe_unicode(self._remote.author(self.id)) + return safe_str(self._remote.author(self.id)) @LazyProperty def date(self): @@ -143,7 +139,7 @@ class GitCommit(base.BaseCommit): @LazyProperty def tags(self): - tags = [safe_unicode(name) for name, + tags = [safe_str(name) for name, commit_id in self.repository.tags.items() if commit_id == self.raw_id] return tags @@ -159,7 +155,7 @@ class GitCommit(base.BaseCommit): def _set_branch(self, branches): if branches: # actually commit can have multiple branches in git - return safe_unicode(branches[0]) + return safe_str(branches[0]) @LazyProperty def branch(self): @@ -167,6 +163,7 @@ class GitCommit(base.BaseCommit): return self._set_branch(branches) def _get_tree_id_for_path(self, path): + path = safe_str(path) if path in self._paths: return self._paths[path] @@ -202,11 +199,10 @@ class GitCommit(base.BaseCommit): return NodeKind.SUBMODULE return None - def _get_filectx(self, path): + def _assert_is_path(self, path): path = self._fix_path(path) if self._get_kind(path) != NodeKind.FILE: - raise CommitError( - "File does not exist for commit %s at '%s'" % (self.raw_id, path)) + raise CommitError(f"File does not exist for commit {self.raw_id} at '{path}'") return path def _get_file_nodes(self): @@ -231,17 +227,19 @@ class GitCommit(base.BaseCommit): def _make_commits(self, commit_ids): def commit_maker(_commit_id): - return self.repository.get_commit(commit_id=commit_id) + return self.repository.get_commit(commit_id=_commit_id) return [commit_maker(commit_id) for commit_id in commit_ids] - def get_file_mode(self, path): + def get_file_mode(self, path: bytes): """ Returns stat mode of the file at the given `path`. """ - path = safe_str(path) + path = self._assert_is_path(path) + # ensure path is traversed self._get_tree_id_for_path(path) + return self._stat_modes[path] def is_link(self, path): @@ -251,6 +249,10 @@ class GitCommit(base.BaseCommit): tree_id, _ = self._get_tree_id_for_path(path) return self._remote.is_binary(tree_id) + def node_md5_hash(self, path): + path = self._assert_is_path(path) + return self._remote.md5_hash(self.raw_id, path) + def get_file_content(self, path): """ Returns content of the file at given `path`. @@ -276,7 +278,7 @@ class GitCommit(base.BaseCommit): which file at given `path` has been modified. """ - path = self._get_filectx(path) + path = self._assert_is_path(path) hist = self._remote.node_history(self.raw_id, path, limit) return [ self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) @@ -296,11 +298,11 @@ class GitCommit(base.BaseCommit): lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load), content) - def get_nodes(self, path): + def get_nodes(self, path, pre_load=None): if self._get_kind(path) != NodeKind.DIR: raise CommitError( - "Directory does not exist for commit %s at '%s'" % (self.raw_id, path)) + f"Directory does not exist for commit {self.raw_id} at '{path}'") path = self._fix_path(path) tree_id, _ = self._get_tree_id_for_path(path) @@ -325,12 +327,11 @@ class GitCommit(base.BaseCommit): self._stat_modes[obj_path] = stat_ if type_ == 'tree': - dirnodes.append(DirNode(obj_path, commit=self)) + dirnodes.append(DirNode(safe_bytes(obj_path), commit=self)) elif type_ == 'blob': - filenodes.append(FileNode(obj_path, commit=self, mode=stat_)) + filenodes.append(FileNode(safe_bytes(obj_path), commit=self, mode=stat_, pre_load=pre_load)) else: - raise CommitError( - "Requested object should be Tree or Blob, is %s", type_) + raise CommitError(f"Requested object should be Tree or Blob, is {type_}") nodes = dirnodes + filenodes for node in nodes: @@ -346,8 +347,8 @@ class GitCommit(base.BaseCommit): tree_id, type_ = self._get_tree_id_for_path(path) except CommitError: raise NodeDoesNotExistError( - "Cannot find one of parents' directories for a given " - "path: %s" % path) + f"Cannot find one of parents' directories for a given " + f"path: {path}") if type_ in ['link', 'commit']: url = self._get_submodule_url(path) @@ -357,9 +358,9 @@ class GitCommit(base.BaseCommit): if path == '': node = RootNode(commit=self) else: - node = DirNode(path, commit=self) + node = DirNode(safe_bytes(path), commit=self) elif type_ == 'blob': - node = FileNode(path, commit=self, pre_load=pre_load) + node = FileNode(safe_bytes(path), commit=self, pre_load=pre_load) self._stat_modes[path] = node.mode else: raise self.no_node_at_path(path) @@ -378,7 +379,7 @@ class GitCommit(base.BaseCommit): file_id = pointer_spec.get('oid_hash') if self._remote.in_largefiles_store(file_id): lf_path = self._remote.store_path(file_id) - return LargeFileNode(lf_path, commit=self, org_path=path) + return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path) @LazyProperty def affected_files(self): @@ -393,7 +394,6 @@ class GitCommit(base.BaseCommit): added = set() modified = set() deleted = set() - _r = self._remote parents = self.parents if not self.parents: @@ -403,14 +403,11 @@ class GitCommit(base.BaseCommit): oid = None else: oid = parent.raw_id - changes = _r.tree_changes(oid, self.raw_id) - for (oldpath, newpath), (_, _), (_, _) in changes: - if newpath and oldpath: - modified.add(newpath) - elif newpath and not oldpath: - added.add(newpath) - elif not newpath and oldpath: - deleted.add(oldpath) + _added, _modified, _deleted = self._remote.tree_changes(oid, self.raw_id) + added = added | set(_added) + modified = modified | set(_modified) + deleted = deleted | set(_deleted) + return added, modified, deleted def _get_paths_for_status(self, status): diff --git a/rhodecode/lib/vcs/backends/git/diff.py b/rhodecode/lib/vcs/backends/git/diff.py --- a/rhodecode/lib/vcs/backends/git/diff.py +++ b/rhodecode/lib/vcs/backends/git/diff.py @@ -29,7 +29,7 @@ from rhodecode.lib.vcs.backends import b class GitDiff(base.Diff): - _header_re = re.compile(r""" + _header_re = re.compile(br""" #^diff[ ]--git [ ]"?a/(?P.+?)"?[ ]"?b/(?P.+?)"?\n (?:^old[ ]mode[ ](?P\d+)\n diff --git a/rhodecode/lib/vcs/backends/git/inmemory.py b/rhodecode/lib/vcs/backends/git/inmemory.py --- a/rhodecode/lib/vcs/backends/git/inmemory.py +++ b/rhodecode/lib/vcs/backends/git/inmemory.py @@ -23,7 +23,7 @@ GIT inmemory module """ from rhodecode.lib.datelib import date_to_timestamp_plus_offset -from rhodecode.lib.utils import safe_str +from rhodecode.lib.str_utils import safe_str, get_default_encodings from rhodecode.lib.vcs.backends import base @@ -50,23 +50,23 @@ class GitInMemoryCommit(base.BaseInMemor if branch is None: branch = self.repository.DEFAULT_BRANCH_NAME - ENCODING = "UTF-8" - commit_tree = None if self.parents[0]: commit_tree = self.parents[0]._commit['tree'] + encoding = get_default_encodings()[0] updated = [] for node in self.added + self.changed: - - if node.is_binary: - content = node.content - else: - content = node.content.encode(ENCODING) + content = node.content + # TODO: left for reference pre py3 migration, probably need to be removed + # if node.is_binary: + # content = node.content + # else: + # content = node.content.encode(ENCODING) updated.append({ 'path': node.path, - 'node_path': node.name.encode(ENCODING), + 'node_path': node.name, 'content': content, 'mode': node.mode, }) @@ -75,7 +75,6 @@ class GitInMemoryCommit(base.BaseInMemor date, tz = date_to_timestamp_plus_offset(date) - # TODO: johbo: Make kwargs explicit and check if this is needed. author_time = kwargs.pop('author_time', date) author_tz = kwargs.pop('author_timezone', tz) @@ -83,11 +82,13 @@ class GitInMemoryCommit(base.BaseInMemor 'parents': [p._commit['id'] for p in self.parents if p], 'author': safe_str(author), 'committer': safe_str(author), - 'encoding': ENCODING, + 'encoding': encoding, 'message': safe_str(message), + 'commit_time': int(date), + 'commit_timezone': tz, + 'author_time': int(author_time), - 'commit_timezone': tz, 'author_timezone': author_tz, } diff --git a/rhodecode/lib/vcs/backends/git/repository.py b/rhodecode/lib/vcs/backends/git/repository.py --- a/rhodecode/lib/vcs/backends/git/repository.py +++ b/rhodecode/lib/vcs/backends/git/repository.py @@ -31,7 +31,7 @@ from zope.cachedescriptors.property impo from collections import OrderedDict from rhodecode.lib.datelib import ( utcdate_fromtimestamp, makedate, date_astimestamp) -from rhodecode.lib.utils import safe_unicode, safe_str +from rhodecode.lib.hash_utils import safe_str from rhodecode.lib.utils2 import CachedProperty from rhodecode.lib.vcs import connection, path as vcspath from rhodecode.lib.vcs.backends.base import ( @@ -107,7 +107,7 @@ class GitRepository(BaseRepository): :param opts: env options to pass into Subprocess command """ if not isinstance(cmd, list): - raise ValueError('cmd must be a list, got %s instead' % type(cmd)) + raise ValueError(f'cmd must be a list, got {type(cmd)} instead') skip_stderr_log = opts.pop('skip_stderr_log', False) out, err = self._remote.run_git_command(cmd, **opts) @@ -310,7 +310,7 @@ class GitRepository(BaseRepository): @LazyProperty def description(self): description = self._remote.get_description() - return safe_unicode(description or self.DEFAULT_DESCRIPTION) + return safe_str(description or self.DEFAULT_DESCRIPTION) def _get_refs_entries(self, prefix='', reverse=False, strip_prefix=True): if self.is_empty(): @@ -322,7 +322,7 @@ class GitRepository(BaseRepository): ref_name = ref if strip_prefix: ref_name = ref[len(prefix):] - result.append((safe_unicode(ref_name), sha)) + result.append((safe_str(ref_name), sha)) def get_name(entry): return entry[0] @@ -561,10 +561,12 @@ class GitRepository(BaseRepository): ``self.EMPTY_COMMIT`` - in this case, patch showing all the changes since empty state of the repository until ``commit2`` :param commit2: Until which commits changes should be shown. + :param path: :param ignore_whitespace: If set to ``True``, would not show whitespace changes. Defaults to ``False``. :param context: How many lines before/after changed lines should be shown. Defaults to ``3``. + :param path1: """ self._validate_diff_commits(commit1, commit2) if path1 is not None and path1 != path: @@ -579,6 +581,7 @@ class GitRepository(BaseRepository): commit1.raw_id, commit2.raw_id, file_filter=file_filter, opt_ignorews=ignore_whitespace, context=context) + return GitDiff(diff) def strip(self, commit_id, branch_name): @@ -865,8 +868,8 @@ class GitRepository(BaseRepository): # N.B.(skreft): the --no-ff option is used to enforce the creation of a # commit message. We also specify the user who is doing the merge. - cmd = ['-c', 'user.name="%s"' % safe_str(user_name), - '-c', 'user.email=%s' % safe_str(user_email), + cmd = ['-c', f'user.name="{user_name}"', + '-c', f'user.email={user_email}', 'merge', '--no-ff', '-m', safe_str(merge_message)] merge_cmd = cmd + heads diff --git a/rhodecode/lib/vcs/backends/hg/commit.py b/rhodecode/lib/vcs/backends/hg/commit.py --- a/rhodecode/lib/vcs/backends/hg/commit.py +++ b/rhodecode/lib/vcs/backends/hg/commit.py @@ -27,15 +27,14 @@ import os from zope.cachedescriptors.property import Lazy as LazyProperty from rhodecode.lib.datelib import utcdate_fromtimestamp -from rhodecode.lib.utils import safe_str, safe_unicode +from rhodecode.lib.str_utils import safe_bytes, safe_str from rhodecode.lib.vcs import path as vcspath from rhodecode.lib.vcs.backends import base -from rhodecode.lib.vcs.backends.hg.diff import MercurialDiff from rhodecode.lib.vcs.exceptions import CommitError from rhodecode.lib.vcs.nodes import ( AddedFileNodesGenerator, ChangedFileNodesGenerator, DirNode, FileNode, NodeKind, RemovedFileNodesGenerator, RootNode, SubModuleNode, - LargeFileNode, LARGEFILE_PREFIX) + LargeFileNode) from rhodecode.lib.vcs.utils.paths import get_dirs_for_path @@ -62,6 +61,7 @@ class MercurialCommit(base.BaseCommit): # caches self.nodes = {} + self._stat_modes = {} # stat info for paths def _set_bulk_properties(self, pre_load): if not pre_load: @@ -75,9 +75,9 @@ class MercurialCommit(base.BaseCommit): for attr, value in result.items(): if attr in ["author", "branch", "message"]: - value = safe_unicode(value) + value = safe_str(value) elif attr == "affected_files": - value = map(safe_unicode, value) + value = list(map(safe_str, value)) elif attr == "date": value = utcdate_fromtimestamp(*value) elif attr in ["children", "parents"]: @@ -94,7 +94,7 @@ class MercurialCommit(base.BaseCommit): @LazyProperty def branch(self): - return safe_unicode(self._remote.ctx_branch(self.raw_id)) + return safe_str(self._remote.ctx_branch(self.raw_id)) @LazyProperty def bookmarks(self): @@ -105,15 +105,15 @@ class MercurialCommit(base.BaseCommit): @LazyProperty def message(self): - return safe_unicode(self._remote.ctx_description(self.raw_id)) + return safe_str(self._remote.ctx_description(self.raw_id)) @LazyProperty def committer(self): - return safe_unicode(self.author) + return safe_str(self.author) @LazyProperty def author(self): - return safe_unicode(self._remote.ctx_user(self.raw_id)) + return safe_str(self._remote.ctx_user(self.raw_id)) @LazyProperty def date(self): @@ -132,9 +132,10 @@ class MercurialCommit(base.BaseCommit): @LazyProperty def _dir_paths(self): - p = list(set(get_dirs_for_path(*self._file_paths))) - p.insert(0, '') - return p + dir_paths = [''] + dir_paths.extend(list(set(get_dirs_for_path(*self._file_paths)))) + + return dir_paths @LazyProperty def _paths(self): @@ -143,7 +144,7 @@ class MercurialCommit(base.BaseCommit): @LazyProperty def id(self): if self.last: - return u'tip' + return 'tip' return self.short_id @LazyProperty @@ -174,7 +175,7 @@ class MercurialCommit(base.BaseCommit): phase_id = self._remote.ctx_phase(self.raw_id) phase_text = self._get_phase_text(phase_id) - return safe_unicode(phase_text) + return safe_str(phase_text) @LazyProperty def obsolete(self): @@ -194,13 +195,6 @@ class MercurialCommit(base.BaseCommit): children = self._remote.ctx_children(self.raw_id) return self._make_commits(children) - def _fix_path(self, path): - """ - Mercurial keeps filenodes as str so we need to encode from unicode - to str. - """ - return safe_str(super(MercurialCommit, self)._fix_path(path)) - def _get_kind(self, path): path = self._fix_path(path) if path in self._file_paths: @@ -208,43 +202,52 @@ class MercurialCommit(base.BaseCommit): elif path in self._dir_paths: return NodeKind.DIR else: - raise CommitError( - "Node does not exist at the given path '%s'" % (path, )) + raise CommitError(f"Node does not exist at the given path '{path}'") - def _get_filectx(self, path): + def _assert_is_path(self, path) -> str: path = self._fix_path(path) if self._get_kind(path) != NodeKind.FILE: - raise CommitError( - "File does not exist for idx %s at '%s'" % (self.raw_id, path)) + raise CommitError(f"File does not exist for commit {self.raw_id} at '{path}'") + return path - def get_file_mode(self, path): + def get_file_mode(self, path: bytes): """ Returns stat mode of the file at the given ``path``. """ - path = self._get_filectx(path) - if 'x' in self._remote.fctx_flags(self.raw_id, path): + path = self._assert_is_path(path) + + if path not in self._stat_modes: + self._stat_modes[path] = self._remote.fctx_flags(self.raw_id, path) + + if 'x' in self._stat_modes[path]: return base.FILEMODE_EXECUTABLE - else: - return base.FILEMODE_DEFAULT + return base.FILEMODE_DEFAULT def is_link(self, path): - path = self._get_filectx(path) - return 'l' in self._remote.fctx_flags(self.raw_id, path) + path = self._assert_is_path(path) + if path not in self._stat_modes: + self._stat_modes[path] = self._remote.fctx_flags(self.raw_id, path) + + return 'l' in self._stat_modes[path] def is_node_binary(self, path): - path = self._get_filectx(path) + path = self._assert_is_path(path) return self._remote.is_binary(self.raw_id, path) + def node_md5_hash(self, path): + path = self._assert_is_path(path) + return self._remote.md5_hash(self.raw_id, path) + def get_file_content(self, path): """ Returns content of the file at given ``path``. """ - path = self._get_filectx(path) + path = self._assert_is_path(path) return self._remote.fctx_node_data(self.raw_id, path) def get_file_content_streamed(self, path): - path = self._get_filectx(path) + path = self._assert_is_path(path) stream_method = getattr(self._remote, 'stream:fctx_node_data') return stream_method(self.raw_id, path) @@ -252,7 +255,7 @@ class MercurialCommit(base.BaseCommit): """ Returns size of the file at given ``path``. """ - path = self._get_filectx(path) + path = self._assert_is_path(path) return self._remote.fctx_size(self.raw_id, path) def get_path_history(self, path, limit=None, pre_load=None): @@ -260,7 +263,7 @@ class MercurialCommit(base.BaseCommit): Returns history of file as reversed list of `MercurialCommit` objects for which file at given ``path`` has been modified. """ - path = self._get_filectx(path) + path = self._assert_is_path(path) hist = self._remote.node_history(self.raw_id, path, limit) return [ self.repository.get_commit(commit_id=commit_id, pre_load=pre_load) @@ -279,7 +282,7 @@ class MercurialCommit(base.BaseCommit): lambda: self.repository.get_commit(commit_id=commit_id, pre_load=pre_load), content) - def get_nodes(self, path): + def get_nodes(self, path, pre_load=None): """ Returns combined ``DirNode`` and ``FileNode`` objects list representing state of commit at the given ``path``. If node at the given ``path`` @@ -292,14 +295,14 @@ class MercurialCommit(base.BaseCommit): path = self._fix_path(path) filenodes = [ - FileNode(f, commit=self) for f in self._file_paths + FileNode(safe_bytes(f), commit=self, pre_load=pre_load) for f in self._file_paths if os.path.dirname(f) == path] # TODO: johbo: Check if this can be done in a more obvious way dirs = path == '' and '' or [ d for d in self._dir_paths if d and vcspath.dirname(d) == path] dirnodes = [ - DirNode(d, commit=self) for d in dirs + DirNode(safe_bytes(d), commit=self) for d in dirs if os.path.dirname(d) == path] alias = self.repository.alias @@ -326,12 +329,12 @@ class MercurialCommit(base.BaseCommit): if path not in self.nodes: if path in self._file_paths: - node = FileNode(path, commit=self, pre_load=pre_load) + node = FileNode(safe_bytes(path), commit=self, pre_load=pre_load) elif path in self._dir_paths: if path == '': node = RootNode(commit=self) else: - node = DirNode(path, commit=self) + node = DirNode(safe_bytes(path), commit=self) else: raise self.no_node_at_path(path) @@ -347,11 +350,11 @@ class MercurialCommit(base.BaseCommit): if self._remote.in_largefiles_store(file_id): lf_path = self._remote.store_path(file_id) - return LargeFileNode(lf_path, commit=self, org_path=path) + return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path) elif self._remote.in_user_cache(file_id): lf_path = self._remote.store_path(file_id) self._remote.link(file_id, path) - return LargeFileNode(lf_path, commit=self, org_path=path) + return LargeFileNode(safe_bytes(lf_path), commit=self, org_path=path) @LazyProperty def _submodules(self): diff --git a/rhodecode/lib/vcs/backends/hg/diff.py b/rhodecode/lib/vcs/backends/hg/diff.py --- a/rhodecode/lib/vcs/backends/hg/diff.py +++ b/rhodecode/lib/vcs/backends/hg/diff.py @@ -29,7 +29,7 @@ from rhodecode.lib.vcs.backends import b class MercurialDiff(base.Diff): - _header_re = re.compile(r""" + _header_re = re.compile(br""" #^diff[ ]--git [ ]"?a/(?P.+?)"?[ ]"?b/(?P.+?)"?\n (?:^old[ ]mode[ ](?P\d+)\n diff --git a/rhodecode/lib/vcs/backends/hg/inmemory.py b/rhodecode/lib/vcs/backends/hg/inmemory.py --- a/rhodecode/lib/vcs/backends/hg/inmemory.py +++ b/rhodecode/lib/vcs/backends/hg/inmemory.py @@ -23,7 +23,7 @@ HG inmemory module """ from rhodecode.lib.datelib import date_to_timestamp_plus_offset -from rhodecode.lib.utils import safe_str +from rhodecode.lib.str_utils import safe_str from rhodecode.lib.vcs.backends.base import BaseInMemoryCommit from rhodecode.lib.vcs.exceptions import RepositoryError @@ -65,14 +65,14 @@ class MercurialInMemoryCommit(BaseInMemo parent_ids = [p.raw_id if p else None for p in self.parents] - ENCODING = "UTF-8" - updated = [] for node in self.added + self.changed: - if node.is_binary: - content = node.content - else: - content = node.content.encode(ENCODING) + content = node.content + # TODO: left for reference pre py3 migration, probably need to be removed + # if node.is_binary: + # content = node.content + # else: + # content = node.content.encode(ENCODING) updated.append({ 'path': node.path, 'content': content, diff --git a/rhodecode/lib/vcs/backends/hg/repository.py b/rhodecode/lib/vcs/backends/hg/repository.py --- a/rhodecode/lib/vcs/backends/hg/repository.py +++ b/rhodecode/lib/vcs/backends/hg/repository.py @@ -25,14 +25,16 @@ import os import logging import binascii import configparser -import urllib.request, urllib.parse, urllib.error +import urllib.request +import urllib.parse +import urllib.error from zope.cachedescriptors.property import Lazy as LazyProperty from collections import OrderedDict from rhodecode.lib.datelib import ( date_to_timestamp_plus_offset, utcdate_fromtimestamp, makedate) -from rhodecode.lib.utils import safe_unicode, safe_str +from rhodecode.lib.str_utils import safe_str from rhodecode.lib.utils2 import CachedProperty from rhodecode.lib.vcs import connection, exceptions from rhodecode.lib.vcs.backends.base import ( @@ -135,7 +137,7 @@ class MercurialRepository(BaseRepository def get_name(ctx): return ctx[0] - _branches = [(safe_unicode(n), hexlify(h),) for n, h in + _branches = [(n, h,) for n, h in self._remote.branches(active, closed).items()] return OrderedDict(sorted(_branches, key=get_name, reverse=False)) @@ -154,7 +156,7 @@ class MercurialRepository(BaseRepository def get_name(ctx): return ctx[0] - _tags = [(safe_unicode(n), hexlify(h),) for n, h in + _tags = [(n, h,) for n, h in self._remote.tags().items()] return OrderedDict(sorted(_tags, key=get_name, reverse=True)) @@ -230,7 +232,7 @@ class MercurialRepository(BaseRepository return ctx[0] _bookmarks = [ - (safe_unicode(n), hexlify(h)) for n, h in + (n, h) for n, h in self._remote.bookmarks().items()] return OrderedDict(sorted(_bookmarks, key=get_name)) @@ -365,8 +367,7 @@ class MercurialRepository(BaseRepository """ if create and os.path.exists(self.path): raise RepositoryError( - "Cannot create repository at %s, location already exist" - % self.path) + f"Cannot create repository at {self.path}, location already exist") if src_url: url = str(self._get_url(src_url)) @@ -379,6 +380,7 @@ class MercurialRepository(BaseRepository if create: os.makedirs(self.path, mode=0o755) + self._remote.localrepository(create) @LazyProperty @@ -389,14 +391,14 @@ class MercurialRepository(BaseRepository def description(self): description = self._remote.get_config_value( 'web', 'description', untrusted=True) - return safe_unicode(description or self.DEFAULT_DESCRIPTION) + return safe_str(description or self.DEFAULT_DESCRIPTION) @LazyProperty def contact(self): contact = ( self._remote.get_config_value("web", "contact") or self._remote.get_config_value("ui", "username")) - return safe_unicode(contact or self.DEFAULT_CONTACT) + return safe_str(contact or self.DEFAULT_CONTACT) @LazyProperty def last_change(self): @@ -425,7 +427,6 @@ class MercurialRepository(BaseRepository to filesystem (``file:///``) schema. """ - url = url.encode('utf8') if url != 'default' and '://' not in url: url = "file:" + urllib.request.pathname2url(url) return url @@ -467,10 +468,7 @@ class MercurialRepository(BaseRepository else: commit_id = "tip" - #TODO: decide if we pass bytes or str into lookup ? - # if isinstance(commit_id, unicode): - # commit_id = safe_str(commit_id) - + # case here is no cached version, do an actual lookup instead try: raw_id, idx = self._remote.lookup(commit_id, both=True) except CommitDoesNotExistError: @@ -1009,5 +1007,9 @@ class MercurialRepository(BaseRepository class MercurialIndexBasedCollectionGenerator(CollectionGenerator): def _commit_factory(self, commit_id): - return self.repo.get_commit( - commit_idx=commit_id, pre_load=self.pre_load) + if isinstance(commit_id, int): + return self.repo.get_commit( + commit_idx=commit_id, pre_load=self.pre_load) + else: + return self.repo.get_commit( + commit_id=commit_id, pre_load=self.pre_load) diff --git a/rhodecode/lib/vcs/backends/svn/commit.py b/rhodecode/lib/vcs/backends/svn/commit.py --- a/rhodecode/lib/vcs/backends/svn/commit.py +++ b/rhodecode/lib/vcs/backends/svn/commit.py @@ -26,10 +26,10 @@ SVN commit module import dateutil.parser from zope.cachedescriptors.property import Lazy as LazyProperty -from rhodecode.lib.utils import safe_str, safe_unicode +from rhodecode.lib.str_utils import safe_bytes, safe_str from rhodecode.lib.vcs import nodes, path as vcspath from rhodecode.lib.vcs.backends import base -from rhodecode.lib.vcs.exceptions import CommitError, NodeDoesNotExistError +from rhodecode.lib.vcs.exceptions import CommitError _SVN_PROP_TRUE = '*' @@ -63,7 +63,7 @@ class SubversionCommit(base.BaseCommit): @property def author(self): - return safe_unicode(self._properties.get('svn:author')) + return safe_str(self._properties.get('svn:author')) @property def date(self): @@ -71,7 +71,7 @@ class SubversionCommit(base.BaseCommit): @property def message(self): - return safe_unicode(self._properties.get('svn:log')) + return safe_str(self._properties.get('svn:log')) @LazyProperty def _properties(self): @@ -93,7 +93,7 @@ class SubversionCommit(base.BaseCommit): return [child] return [] - def get_file_mode(self, path): + def get_file_mode(self, path: bytes): # Note: Subversion flags files which are executable with a special # property `svn:executable` which is set to the value ``"*"``. if self._get_file_property(path, 'svn:executable') == _SVN_PROP_TRUE: @@ -105,13 +105,17 @@ class SubversionCommit(base.BaseCommit): # Note: Subversion has a flag for special files, the content of the # file contains the type of that file. if self._get_file_property(path, 'svn:special') == _SVN_PROP_TRUE: - return self.get_file_content(path).startswith('link') + return self.get_file_content(path).startswith(b'link') return False def is_node_binary(self, path): path = self._fix_path(path) return self._remote.is_binary(self._svn_rev, safe_str(path)) + def node_md5_hash(self, path): + path = self._fix_path(path) + return self._remote.md5_hash(self._svn_rev, safe_str(path)) + def _get_file_property(self, path, name): file_properties = self._remote.node_properties( safe_str(path), self._svn_rev) @@ -119,16 +123,17 @@ class SubversionCommit(base.BaseCommit): def get_file_content(self, path): path = self._fix_path(path) - return self._remote.get_file_content(safe_str(path), self._svn_rev) + return self._remote.get_file_content(self._svn_rev, safe_str(path)) def get_file_content_streamed(self, path): path = self._fix_path(path) + stream_method = getattr(self._remote, 'stream:get_file_content') - return stream_method(safe_str(path), self._svn_rev) + return stream_method(self._svn_rev, safe_str(path)) def get_file_size(self, path): path = self._fix_path(path) - return self._remote.get_file_size(safe_str(path), self._svn_rev) + return self._remote.get_file_size(self._svn_rev, safe_str(path)) def get_path_history(self, path, limit=None, pre_load=None): path = safe_str(self._fix_path(path)) @@ -156,34 +161,32 @@ class SubversionCommit(base.BaseCommit): if path == '': node = nodes.RootNode(commit=self) else: - node_type = self._remote.get_node_type( - safe_str(path), self._svn_rev) + node_type = self._remote.get_node_type(self._svn_rev, safe_str(path)) if node_type == 'dir': - node = nodes.DirNode(path, commit=self) + node = nodes.DirNode(safe_bytes(path), commit=self) elif node_type == 'file': - node = nodes.FileNode(path, commit=self, pre_load=pre_load) + node = nodes.FileNode(safe_bytes(path), commit=self, pre_load=pre_load) else: raise self.no_node_at_path(path) self.nodes[path] = node return self.nodes[path] - def get_nodes(self, path): + def get_nodes(self, path, pre_load=None): if self._get_kind(path) != nodes.NodeKind.DIR: raise CommitError( - "Directory does not exist for commit %s at " - " '%s'" % (self.raw_id, path)) + f"Directory does not exist for commit {self.raw_id} at '{path}'") path = safe_str(self._fix_path(path)) path_nodes = [] - for name, kind in self._remote.get_nodes(path, revision=self._svn_rev): + for name, kind in self._remote.get_nodes(self._svn_rev, path): node_path = vcspath.join(path, name) if kind == 'dir': - node = nodes.DirNode(node_path, commit=self) + node = nodes.DirNode(safe_bytes(node_path), commit=self) elif kind == 'file': - node = nodes.FileNode(node_path, commit=self) + node = nodes.FileNode(safe_bytes(node_path), commit=self, pre_load=pre_load) else: - raise ValueError("Node kind %s not supported." % (kind, )) + raise ValueError(f"Node kind {kind} not supported.") self.nodes[node_path] = node path_nodes.append(node) @@ -191,7 +194,7 @@ class SubversionCommit(base.BaseCommit): def _get_kind(self, path): path = self._fix_path(path) - kind = self._remote.get_node_type(path, self._svn_rev) + kind = self._remote.get_node_type(self._svn_rev, path) if kind == 'file': return nodes.NodeKind.FILE elif kind == 'dir': diff --git a/rhodecode/lib/vcs/backends/svn/diff.py b/rhodecode/lib/vcs/backends/svn/diff.py --- a/rhodecode/lib/vcs/backends/svn/diff.py +++ b/rhodecode/lib/vcs/backends/svn/diff.py @@ -29,11 +29,11 @@ from rhodecode.lib.vcs.backends import b class SubversionDiff(base.Diff): - _meta_re = re.compile(r""" + _meta_re = re.compile(br""" (?:^(?PCannot[ ]display:[ ]file[ ]marked[ ]as[ ]a[ ]binary[ ]type.)(?:\n|$))? """, re.VERBOSE | re.MULTILINE) - _header_re = re.compile(r""" + _header_re = re.compile(br""" #^diff[ ]--git [ ]"?a/(?P.+?)"?[ ]"?b/(?P.+?)"?\n (?:^similarity[ ]index[ ](?P\d+)%\n diff --git a/rhodecode/lib/vcs/backends/svn/inmemory.py b/rhodecode/lib/vcs/backends/svn/inmemory.py --- a/rhodecode/lib/vcs/backends/svn/inmemory.py +++ b/rhodecode/lib/vcs/backends/svn/inmemory.py @@ -24,7 +24,7 @@ SVN inmemory module """ from rhodecode.lib.datelib import date_astimestamp -from rhodecode.lib.utils import safe_str +from rhodecode.lib.str_utils import safe_str, safe_bytes from rhodecode.lib.vcs.backends import base @@ -42,8 +42,8 @@ class SubversionInMemoryCommit(base.Base updated = [] for node in self.added: node_data = { - 'path': node.path, - 'content': safe_str(node.content), + 'path': safe_bytes(node.path), + 'content': node.content, 'mode': node.mode, } if node.is_binary: @@ -53,15 +53,15 @@ class SubversionInMemoryCommit(base.Base updated.append(node_data) for node in self.changed: updated.append({ - 'path': node.path, - 'content': safe_str(node.content), + 'path': safe_bytes(node.path), + 'content': node.content, 'mode': node.mode, }) removed = [] for node in self.removed: removed.append({ - 'path': node.path, + 'path': safe_bytes(node.path), }) timestamp = date_astimestamp(date) if date else None diff --git a/rhodecode/lib/vcs/backends/svn/repository.py b/rhodecode/lib/vcs/backends/svn/repository.py --- a/rhodecode/lib/vcs/backends/svn/repository.py +++ b/rhodecode/lib/vcs/backends/svn/repository.py @@ -24,13 +24,15 @@ SVN repository module import logging import os -import urllib.request, urllib.parse, urllib.error +import urllib.request +import urllib.parse +import urllib.error from zope.cachedescriptors.property import Lazy as LazyProperty from collections import OrderedDict from rhodecode.lib.datelib import date_astimestamp -from rhodecode.lib.utils import safe_str, safe_unicode +from rhodecode.lib.str_utils import safe_str from rhodecode.lib.utils2 import CachedProperty from rhodecode.lib.vcs import connection, path as vcspath from rhodecode.lib.vcs.backends import base @@ -88,8 +90,8 @@ class SubversionRepository(base.BaseRepo def _init_repo(self, create, src_url): if create and os.path.exists(self.path): raise RepositoryError( - "Cannot create repository at %s, location already exist" - % self.path) + f"Cannot create repository at {self.path}, location already exist" + ) if create: self._remote.create_repository(settings.SVN_COMPATIBLE_VERSION) @@ -116,7 +118,7 @@ class SubversionRepository(base.BaseRepo :param opts: env options to pass into Subprocess command """ if not isinstance(cmd, list): - raise ValueError('cmd must be a list, got %s instead' % type(cmd)) + raise ValueError(f'cmd must be a list, got {type(cmd)} instead') skip_stderr_log = opts.pop('skip_stderr_log', False) out, err = self._remote.run_svn_command(cmd, **opts) @@ -165,10 +167,7 @@ class SubversionRepository(base.BaseRepo directories = (tip.get_node(pattern), ) except NodeDoesNotExistError: continue - found_items.update( - (safe_unicode(n.path), - self.commit_ids[-1]) - for n in directories) + found_items.update((safe_str(n.path), self.commit_ids[-1]) for n in directories) def get_name(item): return item[0] @@ -329,7 +328,7 @@ class SubversionRepository(base.BaseRepo # TODO: johbo: Reconsider impact of DEFAULT_BRANCH_NAME here if branch_name not in [None, self.DEFAULT_BRANCH_NAME]: - svn_rev = long(self.commit_ids[-1]) + svn_rev = int(self.commit_ids[-1]) commit_ids = self._remote.node_history( path=branch_name, revision=svn_rev, limit=None) commit_ids = [str(i) for i in reversed(commit_ids)] @@ -356,8 +355,8 @@ class SubversionRepository(base.BaseRepo self, commit1, commit2, path=None, ignore_whitespace=False, context=3, path1=None): self._validate_diff_commits(commit1, commit2) - svn_rev1 = long(commit1.raw_id) - svn_rev2 = long(commit2.raw_id) + svn_rev1 = int(commit1.raw_id) + svn_rev2 = int(commit2.raw_id) diff = self._remote.diff( svn_rev1, svn_rev2, path1=path1, path2=path, ignore_whitespace=ignore_whitespace, context=context)