diff --git a/rhodecode/lib/vcs/client_http.py b/rhodecode/lib/vcs/client_http.py --- a/rhodecode/lib/vcs/client_http.py +++ b/rhodecode/lib/vcs/client_http.py @@ -26,7 +26,9 @@ import copy import logging import threading import time -import urllib.request, urllib.error, urllib.parse +import urllib.request +import urllib.error +import urllib.parse import urllib.parse import uuid import traceback @@ -59,6 +61,7 @@ def _remote_call(url, payload, exception for attempt in range(retries): try: response = session.post(url, data=msgpack.packb(payload)) + break except pycurl.error as e: error_code, error_message = e.args if error_code == pycurl.E_RECV_ERROR: @@ -76,12 +79,13 @@ def _remote_call(url, payload, exception raise if response.status_code >= 400: - log.error('Call to %s returned non 200 HTTP code: %s', - url, response.status_code) + content_type = response.content_type + log.error('Call to %s returned non 200 HTTP code: %s [%s]', + url, response.status_code, content_type) raise exceptions.HttpVCSCommunicationError(repr(response.content)) try: - response = msgpack.unpackb(response.content, raw=False) + response = msgpack.unpackb(response.content) except Exception: log.exception('Failed to decode response from msgpack') raise @@ -103,10 +107,20 @@ def _remote_call(url, payload, exception except KeyError: pass - raise exc + exc.add_note(attach_exc_details(error)) + raise exc # raising the org exception from vcsserver return response.get('result') +def attach_exc_details(error): + note = '-- EXC NOTE -- :\n' + note += f'vcs_kind: {error.get("_vcs_kind")}\n' + note += f'org_exc: {error.get("_vcs_kind")}\n' + note += f'tb: {error.get("traceback")}\n' + note += '-- END EXC NOTE --' + return note + + def _streaming_remote_call(url, payload, exceptions_map, session, chunk_size): try: headers = { @@ -166,7 +180,7 @@ class RemoteVCSMaker(object): @classmethod def init_cache_region(cls, repo_id): - cache_namespace_uid = 'cache_repo.{}'.format(repo_id) + cache_namespace_uid = 'repo.{}'.format(repo_id) region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid) return region, cache_namespace_uid @@ -267,7 +281,7 @@ class RemoteRepo(object): def get_local_cache(self, name, args): cache_on = False cache_key = '' - local_cache_on = str2bool(rhodecode.CONFIG.get('vcs.methods.cache')) + local_cache_on = rhodecode.ConfigGet().get_bool('vcs.methods.cache') cache_methods = [ 'branches', 'tags', 'bookmarks', @@ -300,7 +314,7 @@ class RemoteRepo(object): namespace=self._cache_namespace, condition=cache_on and cache_key) def remote_call(_cache_key): if self._call_with_logging: - args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}' + args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}' log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s', url, name, args_repr, context_uid, cache_on) return _remote_call(url, payload, EXCEPTIONS_MAP, self._session) @@ -323,7 +337,7 @@ class RemoteRepo(object): # Cache is a problem because this is a stream def streaming_remote_call(_cache_key): if self._call_with_logging: - args_repr = f'ARG: {str(args):.256}|KW: {str(kwargs):.256}' + args_repr = f'ARG: {str(args):.512}|KW: {str(kwargs):.512}' log.debug('Calling %s@%s with args:%r. wire_context: %s cache_on: %s', url, name, args_repr, context_uid, cache_on) return _streaming_remote_call(url, payload, EXCEPTIONS_MAP, self._session, self.CHUNK_SIZE) diff --git a/rhodecode/lib/vcs/conf/settings.py b/rhodecode/lib/vcs/conf/settings.py --- a/rhodecode/lib/vcs/conf/settings.py +++ b/rhodecode/lib/vcs/conf/settings.py @@ -22,7 +22,7 @@ Internal settings for vcs-lib """ -# list of default encoding used in safe_unicode/safe_str methods +# list of default encoding used in safe_str methods DEFAULT_ENCODINGS = ['utf8'] diff --git a/rhodecode/lib/vcs/exceptions.py b/rhodecode/lib/vcs/exceptions.py --- a/rhodecode/lib/vcs/exceptions.py +++ b/rhodecode/lib/vcs/exceptions.py @@ -23,7 +23,8 @@ Custom vcs exceptions module. """ import logging import functools -import urllib.request, urllib.error, urllib.parse +import urllib.error +import urllib.parse import rhodecode log = logging.getLogger(__name__) @@ -185,12 +186,12 @@ def map_vcs_exceptions(func): try: return func(*args, **kwargs) except Exception as e: - from rhodecode.lib.utils2 import str2bool - debug = str2bool(rhodecode.CONFIG.get('debug')) + debug = rhodecode.ConfigGet().get_bool('debug') # The error middleware adds information if it finds # __traceback_info__ in a frame object. This way the remote # traceback information is made available in error reports. + remote_tb = getattr(e, '_vcs_server_traceback', None) org_remote_tb = getattr(e, '_vcs_server_org_exc_tb', '') __traceback_info__ = None diff --git a/rhodecode/lib/vcs/nodes.py b/rhodecode/lib/vcs/nodes.py --- a/rhodecode/lib/vcs/nodes.py +++ b/rhodecode/lib/vcs/nodes.py @@ -21,16 +21,15 @@ """ Module holding everything related to vcs nodes, with vcs2 architecture. """ - +import functools import os import stat from zope.cachedescriptors.property import Lazy as LazyProperty -import rhodecode from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP -from rhodecode.lib.utils import safe_unicode, safe_str -from rhodecode.lib.utils2 import md5 +from rhodecode.lib.str_utils import safe_str, safe_bytes +from rhodecode.lib.hash_utils import md5 from rhodecode.lib.vcs import path as vcspath from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db @@ -52,6 +51,10 @@ class NodeState: NOT_CHANGED = 'not changed' REMOVED = 'removed' +#TODO: not sure if that should be bytes or str ? +# most probably bytes because content should be bytes and we check it +BIN_BYTE_MARKER = b'\0' + class NodeGeneratorBase(object): """ @@ -68,9 +71,10 @@ class NodeGeneratorBase(object): def __call__(self): return [n for n in self] - def __getslice__(self, i, j): - for p in self.current_paths[i:j]: - yield self.cs.get_node(p) + def __getitem__(self, key): + if isinstance(key, slice): + for p in self.current_paths[key.start:key.stop]: + yield self.cs.get_node(p) def __len__(self): return len(self.current_paths) @@ -98,13 +102,15 @@ class RemovedFileNodesGenerator(NodeGene """ def __iter__(self): for p in self.current_paths: - yield RemovedFileNode(path=p) + yield RemovedFileNode(path=safe_bytes(p)) - def __getslice__(self, i, j): - for p in self.current_paths[i:j]: - yield RemovedFileNode(path=p) + def __getitem__(self, key): + if isinstance(key, slice): + for p in self.current_paths[key.start:key.stop]: + yield RemovedFileNode(path=safe_bytes(p)) +@functools.total_ordering class Node(object): """ Simplest class representing file or directory on repository. SCM backends @@ -115,14 +121,19 @@ class Node(object): only. Moreover, every single node is identified by the ``path`` attribute, so it cannot end with slash, too. Otherwise, path could lead to mistakes. """ - RTLO_MARKER = "\u202E" # RTLO marker allows swapping text, and certain - # security attacks could be used with this + # RTLO marker allows swapping text, and certain + # security attacks could be used with this + RTLO_MARKER = "\u202E" + commit = None - def __init__(self, path, kind): + def __init__(self, path: bytes, kind): self._validate_path(path) # can throw exception if path is invalid - self.path = safe_str(path.rstrip('/')) # we store paths as str - if path == '' and kind != NodeKind.DIR: + + self.bytes_path = path.rstrip(b'/') # store for __repr__ + self.path = safe_str(self.bytes_path) # we store paths as str + + if self.bytes_path == b'' and kind != NodeKind.DIR: raise NodeError("Only DirNode and its subclasses may be " "initialized with empty path") self.kind = kind @@ -130,12 +141,65 @@ class Node(object): if self.is_root() and not self.is_dir(): raise NodeError("Root node cannot be FILE kind") - def _validate_path(self, path): - if path.startswith('/'): + def __eq__(self, other): + if type(self) is not type(other): + return False + for attr in ['name', 'path', 'kind']: + if getattr(self, attr) != getattr(other, attr): + return False + if self.is_file(): + # FileNode compare, we need to fallback to content compare + return None + else: + # For DirNode's check without entering each dir + self_nodes_paths = list(sorted(n.path for n in self.nodes)) + other_nodes_paths = list(sorted(n.path for n in self.nodes)) + if self_nodes_paths != other_nodes_paths: + return False + return True + + def __lt__(self, other): + if self.kind < other.kind: + return True + if self.kind > other.kind: + return False + if self.path < other.path: + return True + if self.path > other.path: + return False + + # def __cmp__(self, other): + # """ + # Comparator using name of the node, needed for quick list sorting. + # """ + # + # kind_cmp = cmp(self.kind, other.kind) + # if kind_cmp: + # if isinstance(self, SubModuleNode): + # # we make submodules equal to dirnode for "sorting" purposes + # return NodeKind.DIR + # return kind_cmp + # return cmp(self.name, other.name) + + def __repr__(self): + maybe_path = getattr(self, 'path', 'UNKNOWN_PATH') + return f'<{self.__class__.__name__} {maybe_path!r}>' + + def __str__(self): + return self.name + + def _validate_path(self, path: bytes): + self._assert_bytes(path) + + if path.startswith(b'/'): raise NodeError( - "Cannot initialize Node objects with slash at " - "the beginning as only relative paths are supported. " - "Got %s" % (path,)) + f"Cannot initialize Node objects with slash at " + f"the beginning as only relative paths are supported. " + f"Got {path}") + + def _assert_bytes(self, value): + if not isinstance(value, bytes): + raise TypeError(f"Bytes required as input, got {type(value)} of {value}.") @LazyProperty def parent(self): @@ -147,22 +211,13 @@ class Node(object): return None @LazyProperty - def unicode_path(self): - return safe_unicode(self.path) + def str_path(self) -> str: + return safe_str(self.path) @LazyProperty def has_rtlo(self): """Detects if a path has right-to-left-override marker""" - return self.RTLO_MARKER in self.unicode_path - - @LazyProperty - def unicode_path_safe(self): - """ - Special SAFE representation of path without the right-to-left-override. - This should be only used for "showing" the file, cannot be used for any - urls etc. - """ - return safe_unicode(self.path).replace(self.RTLO_MARKER, '') + return self.RTLO_MARKER in self.str_path @LazyProperty def dir_path(self): @@ -172,7 +227,7 @@ class Node(object): """ _parts = self.path.rstrip('/').rsplit('/', 1) if len(_parts) == 2: - return safe_unicode(_parts[0]) + return _parts[0] return '' @LazyProperty @@ -181,7 +236,7 @@ class Node(object): Returns name of the node so if its path then only last part is returned. """ - return safe_unicode(self.path.rstrip('/').split('/')[-1]) + return self.path.rstrip('/').split('/')[-1] @property def kind(self): @@ -197,53 +252,15 @@ class Node(object): if self.path.endswith('/'): raise NodeError("Node's path cannot end with slash") - def __cmp__(self, other): - """ - Comparator using name of the node, needed for quick list sorting. - """ - - kind_cmp = cmp(self.kind, other.kind) - if kind_cmp: - if isinstance(self, SubModuleNode): - # we make submodules equal to dirnode for "sorting" purposes - return NodeKind.DIR - return kind_cmp - return cmp(self.name, other.name) - - def __eq__(self, other): - for attr in ['name', 'path', 'kind']: - if getattr(self, attr) != getattr(other, attr): - return False - if self.is_file(): - if self.content != other.content: - return False - else: - # For DirNode's check without entering each dir - self_nodes_paths = list(sorted(n.path for n in self.nodes)) - other_nodes_paths = list(sorted(n.path for n in self.nodes)) - if self_nodes_paths != other_nodes_paths: - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def __repr__(self): - return '<%s %r>' % (self.__class__.__name__, self.path) - - def __str__(self): - return self.__repr__() - - def __unicode__(self): - return self.name - - def get_parent_path(self): + def get_parent_path(self) -> bytes: """ Returns node's parent path or empty string if node is root. """ if self.is_root(): - return '' - return vcspath.dirname(self.path.rstrip('/')) + '/' + return b'' + str_path = vcspath.dirname(self.path.rstrip('/')) + '/' + + return safe_bytes(str_path) def is_file(self): """ @@ -312,7 +329,7 @@ class FileNode(Node): """ _filter_pre_load = [] - def __init__(self, path, content=None, commit=None, mode=None, pre_load=None): + def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None): """ Only one of ``content`` and ``commit`` may be given. Passing both would raise ``NodeError`` exception. @@ -324,13 +341,39 @@ class FileNode(Node): """ if content and commit: raise NodeError("Cannot use both content and commit") - super(FileNode, self).__init__(path, kind=NodeKind.FILE) + + super().__init__(path, kind=NodeKind.FILE) + self.commit = commit + if content and not isinstance(content, bytes): + # File content is one thing that inherently must be bytes + # we support passing str too, and convert the content + content = safe_bytes(content) self._content = content self._mode = mode or FILEMODE_DEFAULT self._set_bulk_properties(pre_load) + def __eq__(self, other): + eq = super(FileNode, self).__eq__(other) + if eq is not None: + return eq + return self.content == other.content + + def __hash__(self): + raw_id = getattr(self.commit, 'raw_id', '') + return hash((self.path, raw_id)) + + def __lt__(self, other): + lt = super(FileNode, self).__lt__(other) + if lt is not None: + return lt + return self.content < other.content + + def __repr__(self): + short_id = getattr(self.commit, 'short_id', '') + return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>' + def _set_bulk_properties(self, pre_load): if not pre_load: return @@ -339,11 +382,22 @@ class FileNode(Node): if not pre_load: return - for attr_name in pre_load: - result = getattr(self, attr_name) - if callable(result): - result = result() - self.__dict__[attr_name] = result + remote = self.commit.get_remote() + result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load) + + for attr, value in result.items(): + if attr == "flags": + self.__dict__['mode'] = safe_str(value) + elif attr == "size": + self.__dict__['size'] = value + elif attr == "data": + self.__dict__['_content'] = value + elif attr == "is_binary": + self.__dict__['is_binary'] = value + elif attr == "md5": + self.__dict__['md5'] = value + else: + raise ValueError(f'Unsupported attr in bulk_property: {attr}') @LazyProperty def mode(self): @@ -358,7 +412,7 @@ class FileNode(Node): return mode @LazyProperty - def raw_bytes(self): + def raw_bytes(self) -> bytes: """ Returns lazily the raw bytes of the FileNode. """ @@ -370,6 +424,16 @@ class FileNode(Node): content = self._content return content + def content_uncached(self): + """ + Returns lazily content of the FileNode. + """ + if self.commit: + content = self.commit.get_file_content(self.path) + else: + content = self._content + return content + def stream_bytes(self): """ Returns an iterator that will stream the content of the file directly from @@ -379,13 +443,6 @@ class FileNode(Node): return self.commit.get_file_content_streamed(self.path) raise NodeError("Cannot retrieve stream_bytes without related commit attribute") - @LazyProperty - def md5(self): - """ - Returns md5 of the file node. - """ - return md5(self.raw_bytes) - def metadata_uncached(self): """ Returns md5, binary flag of the file node, without any cache usage. @@ -393,35 +450,26 @@ class FileNode(Node): content = self.content_uncached() - is_binary = content and '\0' in content + is_binary = bool(content and BIN_BYTE_MARKER in content) size = 0 if content: size = len(content) return is_binary, md5(content), size, content - def content_uncached(self): - """ - Returns lazily content of the FileNode. If possible, would try to - decode content from UTF-8. + @LazyProperty + def content(self) -> bytes: """ - if self.commit: - content = self.commit.get_file_content(self.path) - else: - content = self._content + Returns lazily content of the FileNode. + """ + content = self.raw_bytes + if content and not isinstance(content, bytes): + raise ValueError(f'Content is of type {type(content)} instead of bytes') return content @LazyProperty - def content(self): - """ - Returns lazily content of the FileNode. If possible, would try to - decode content from UTF-8. - """ - content = self.raw_bytes - - if self.is_binary: - return content - return safe_unicode(content) + def str_content(self) -> str: + return safe_str(self.raw_bytes) @LazyProperty def size(self): @@ -457,7 +505,7 @@ class FileNode(Node): """ if hasattr(self, '_mimetype'): - if (isinstance(self._mimetype, (tuple, list,)) and + if (isinstance(self._mimetype, (tuple, list)) and len(self._mimetype) == 2): return self._mimetype else: @@ -511,7 +559,7 @@ class FileNode(Node): lexer = lexers.guess_lexer_for_filename( filename, content, stripnl=False) except lexers.ClassNotFound: - lexer = None + pass # try our EXTENSION_MAP if not lexer: @@ -520,7 +568,7 @@ class FileNode(Node): if lexer_class: lexer = lexers.get_lexer_by_name(lexer_class[0]) except lexers.ClassNotFound: - lexer = None + pass if not lexer: lexer = lexers.TextLexer(stripnl=False) @@ -533,7 +581,10 @@ class FileNode(Node): Returns pygment's lexer class. Would try to guess lexer taking file's content, name and mimetype. """ - return self.get_lexer(self.name, self.content) + # TODO: this is more proper, but super heavy on investigating the type based on the content + #self.get_lexer(self.name, self.content) + + return self.get_lexer(self.name) @LazyProperty def lexer_alias(self): @@ -583,7 +634,20 @@ class FileNode(Node): return self.commit.is_node_binary(self.path) else: raw_bytes = self._content - return raw_bytes and '\0' in raw_bytes + return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes) + + @LazyProperty + def md5(self): + """ + Returns md5 of the file node. + """ + + if self.commit: + return self.commit.node_md5_hash(self.path) + else: + raw_bytes = self._content + # TODO: this sucks, we're computing md5 on potentially super big stream data... + return md5(raw_bytes) @LazyProperty def extension(self): @@ -607,20 +671,26 @@ class FileNode(Node): if self.commit: return self.commit.get_largefile_node(self.path) - def count_lines(self, content, count_empty=False): + def count_lines(self, content: str | bytes, count_empty=False): + if isinstance(content, str): + newline_marker = '\n' + elif isinstance(content, bytes): + newline_marker = b'\n' + else: + raise ValueError('content must be bytes or str got {type(content)} instead') if count_empty: all_lines = 0 empty_lines = 0 for line in content.splitlines(True): - if line == '\n': + if line == newline_marker: empty_lines += 1 all_lines += 1 return all_lines, all_lines - empty_lines else: # fast method - empty_lines = all_lines = content.count('\n') + empty_lines = all_lines = content.count(newline_marker) if all_lines == 0 and content: # one-line without a newline empty_lines = all_lines = 1 @@ -635,10 +705,6 @@ class FileNode(Node): all_lines, empty_lines = self.count_lines(content, count_empty=count_empty) return all_lines, empty_lines - def __repr__(self): - return '<%s %r @ %s>' % (self.__class__.__name__, self.path, - getattr(self.commit, 'short_id', '')) - class RemovedFileNode(FileNode): """ @@ -648,20 +714,19 @@ class RemovedFileNode(FileNode): """ ALLOWED_ATTRIBUTES = [ 'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind', - 'added', 'changed', 'not_changed', 'removed' + 'added', 'changed', 'not_changed', 'removed', 'bytes_path' ] def __init__(self, path): """ :param path: relative path to the node """ - super(RemovedFileNode, self).__init__(path=path) + super().__init__(path=path) def __getattribute__(self, attr): if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES: - return super(RemovedFileNode, self).__getattribute__(attr) - raise RemovedFileNodeError( - "Cannot access attribute %s on RemovedFileNode" % attr) + return super().__getattribute__(attr) + raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes") @LazyProperty def state(self): @@ -675,7 +740,7 @@ class DirNode(Node): lazily fetch data within same repository's commit. """ - def __init__(self, path, nodes=(), commit=None): + def __init__(self, path, nodes=(), commit=None, default_pre_load=None): """ Only one of ``nodes`` and ``commit`` may be given. Passing both would raise ``NodeError`` exception. @@ -689,16 +754,38 @@ class DirNode(Node): super(DirNode, self).__init__(path, NodeKind.DIR) self.commit = commit self._nodes = nodes + self.default_pre_load = default_pre_load or ['is_binary', 'size'] + + def __iter__(self): + for node in self.nodes: + yield node + + def __eq__(self, other): + eq = super(DirNode, self).__eq__(other) + if eq is not None: + return eq + # check without entering each dir + self_nodes_paths = list(sorted(n.path for n in self.nodes)) + other_nodes_paths = list(sorted(n.path for n in self.nodes)) + return self_nodes_paths == other_nodes_paths + + def __lt__(self, other): + lt = super(DirNode, self).__lt__(other) + if lt is not None: + return lt + # check without entering each dir + self_nodes_paths = list(sorted(n.path for n in self.nodes)) + other_nodes_paths = list(sorted(n.path for n in self.nodes)) + return self_nodes_paths < other_nodes_paths @LazyProperty def content(self): - raise NodeError( - "%s represents a dir and has no `content` attribute" % self) + raise NodeError(f"{self} represents a dir and has no `content` attribute") @LazyProperty def nodes(self): if self.commit: - nodes = self.commit.get_nodes(self.path) + nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load) else: nodes = self._nodes self._nodes_dict = dict((node.path, node) for node in nodes) @@ -712,10 +799,6 @@ class DirNode(Node): def dirs(self): return sorted((node for node in self.nodes if node.is_dir())) - def __iter__(self): - for node in self.nodes: - yield node - def get_node(self, path): """ Returns node from within this particular ``DirNode``, so it is now @@ -755,7 +838,7 @@ class DirNode(Node): else: raise KeyError except KeyError: - raise NodeError("Node does not exist at %s" % path) + raise NodeError(f"Node does not exist at {path}") @LazyProperty def state(self): @@ -780,8 +863,8 @@ class DirNode(Node): "related commit attribute") def __repr__(self): - return '<%s %r @ %s>' % (self.__class__.__name__, self.path, - getattr(self.commit, 'short_id', '')) + short_id = getattr(self.commit, 'short_id', '') + return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>' class RootNode(DirNode): @@ -790,10 +873,10 @@ class RootNode(DirNode): """ def __init__(self, nodes=(), commit=None): - super(RootNode, self).__init__(path='', nodes=nodes, commit=commit) + super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit) def __repr__(self): - return '<%s>' % self.__class__.__name__ + return f'<{self.__class__.__name__}>' class SubModuleNode(Node): @@ -814,8 +897,8 @@ class SubModuleNode(Node): self.url = url or self._extract_submodule_url() def __repr__(self): - return '<%s %r @ %s>' % (self.__class__.__name__, self.path, - getattr(self.commit, 'short_id', '')) + short_id = getattr(self.commit, 'short_id', '') + return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>' def _extract_submodule_url(self): # TODO: find a way to parse gits submodule file and extract the @@ -828,27 +911,31 @@ class SubModuleNode(Node): Returns name of the node so if its path then only last part is returned. """ - org = safe_unicode(self.path.rstrip('/').split('/')[-1]) - return '%s @ %s' % (org, self.commit.short_id) + org = safe_str(self.path.rstrip('/').split('/')[-1]) + return f'{org} @ {self.commit.short_id}' class LargeFileNode(FileNode): def __init__(self, path, url=None, commit=None, alias=None, org_path=None): - self.path = path - self.org_path = org_path + self._validate_path(path) # can throw exception if path is invalid + self.org_path = org_path # as stored in VCS as LF pointer + + self.bytes_path = path.rstrip(b'/') # store for __repr__ + self.path = safe_str(self.bytes_path) # we store paths as str + self.kind = NodeKind.LARGEFILE self.alias = alias - self._content = '' + self._content = b'' - def _validate_path(self, path): + def _validate_path(self, path: bytes): """ - we override check since the LargeFileNode path is system absolute + we override check since the LargeFileNode path is system absolute, but we check for bytes only """ - pass + self._assert_bytes(path) def __repr__(self): - return '<%s %r>' % (self.__class__.__name__, self.path) + return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>' @LazyProperty def size(self): diff --git a/rhodecode/lib/vcs/utils/helpers.py b/rhodecode/lib/vcs/utils/helpers.py --- a/rhodecode/lib/vcs/utils/helpers.py +++ b/rhodecode/lib/vcs/utils/helpers.py @@ -55,7 +55,7 @@ def get_scm(path): raise VCSError( 'More than one [%s] scm found at given path %s' % (found, path)) - if len(found_scms) is 0: + if len(found_scms) == 0: raise VCSError('No scm found at given path %s' % path) return found_scms[0]