nodes.py
963 lines
| 30.0 KiB
| text/x-python
|
PythonLexer
r5054 | ||||
r1 | ||||
r4306 | # Copyright (C) 2014-2020 RhodeCode GmbH | |||
r1 | # | |||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU Affero General Public License, version 3 | ||||
# (only), as published by the Free Software Foundation. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU Affero General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
# | ||||
# This program is dual-licensed. If you wish to learn more about the | ||||
# RhodeCode Enterprise Edition, including its added features, Support services, | ||||
# and proprietary license terms, please see https://rhodecode.com/licenses/ | ||||
""" | ||||
Module holding everything related to vcs nodes, with vcs2 architecture. | ||||
""" | ||||
r5075 | import functools | |||
r1577 | import os | |||
r1 | import stat | |||
from zope.cachedescriptors.property import Lazy as LazyProperty | ||||
r796 | from rhodecode.config.conf import LANGUAGES_EXTENSIONS_MAP | |||
r5075 | from rhodecode.lib.str_utils import safe_str, safe_bytes | |||
from rhodecode.lib.hash_utils import md5 | ||||
r1 | from rhodecode.lib.vcs import path as vcspath | |||
from rhodecode.lib.vcs.backends.base import EmptyCommit, FILEMODE_DEFAULT | ||||
from rhodecode.lib.vcs.conf.mtypes import get_mimetypes_db | ||||
from rhodecode.lib.vcs.exceptions import NodeError, RemovedFileNodeError | ||||
LARGEFILE_PREFIX = '.hglf' | ||||
class NodeKind: | ||||
SUBMODULE = -1 | ||||
DIR = 1 | ||||
FILE = 2 | ||||
LARGEFILE = 3 | ||||
class NodeState: | ||||
r4959 | ADDED = 'added' | |||
CHANGED = 'changed' | ||||
NOT_CHANGED = 'not changed' | ||||
REMOVED = 'removed' | ||||
r1 | ||||
r5075 | #TODO: not sure if that should be bytes or str ? | |||
# most probably bytes because content should be bytes and we check it | ||||
BIN_BYTE_MARKER = b'\0' | ||||
r1 | ||||
class NodeGeneratorBase(object): | ||||
""" | ||||
Base class for removed added and changed filenodes, it's a lazy generator | ||||
class that will create filenodes only on iteration or call | ||||
The len method doesn't need to create filenodes at all | ||||
""" | ||||
def __init__(self, current_paths, cs): | ||||
self.cs = cs | ||||
self.current_paths = current_paths | ||||
def __call__(self): | ||||
return [n for n in self] | ||||
r5075 | def __getitem__(self, key): | |||
if isinstance(key, slice): | ||||
for p in self.current_paths[key.start:key.stop]: | ||||
yield self.cs.get_node(p) | ||||
r1 | ||||
def __len__(self): | ||||
return len(self.current_paths) | ||||
def __iter__(self): | ||||
for p in self.current_paths: | ||||
yield self.cs.get_node(p) | ||||
class AddedFileNodesGenerator(NodeGeneratorBase): | ||||
""" | ||||
Class holding added files for current commit | ||||
""" | ||||
class ChangedFileNodesGenerator(NodeGeneratorBase): | ||||
""" | ||||
Class holding changed files for current commit | ||||
""" | ||||
class RemovedFileNodesGenerator(NodeGeneratorBase): | ||||
""" | ||||
Class holding removed files for current commit | ||||
""" | ||||
def __iter__(self): | ||||
for p in self.current_paths: | ||||
r5075 | yield RemovedFileNode(path=safe_bytes(p)) | |||
r1 | ||||
r5075 | def __getitem__(self, key): | |||
if isinstance(key, slice): | ||||
for p in self.current_paths[key.start:key.stop]: | ||||
yield RemovedFileNode(path=safe_bytes(p)) | ||||
r1 | ||||
r5075 | @functools.total_ordering | |||
r1 | class Node(object): | |||
""" | ||||
Simplest class representing file or directory on repository. SCM backends | ||||
should use ``FileNode`` and ``DirNode`` subclasses rather than ``Node`` | ||||
directly. | ||||
Node's ``path`` cannot start with slash as we operate on *relative* paths | ||||
only. Moreover, every single node is identified by the ``path`` attribute, | ||||
so it cannot end with slash, too. Otherwise, path could lead to mistakes. | ||||
""" | ||||
r5075 | # RTLO marker allows swapping text, and certain | |||
# security attacks could be used with this | ||||
RTLO_MARKER = "\u202E" | ||||
r1 | commit = None | |||
r5075 | def __init__(self, path: bytes, kind): | |||
r1 | self._validate_path(path) # can throw exception if path is invalid | |||
r5075 | ||||
self.bytes_path = path.rstrip(b'/') # store for __repr__ | ||||
self.path = safe_str(self.bytes_path) # we store paths as str | ||||
if self.bytes_path == b'' and kind != NodeKind.DIR: | ||||
r1 | raise NodeError("Only DirNode and its subclasses may be " | |||
"initialized with empty path") | ||||
self.kind = kind | ||||
if self.is_root() and not self.is_dir(): | ||||
raise NodeError("Root node cannot be FILE kind") | ||||
r5075 | def __eq__(self, other): | |||
if type(self) is not type(other): | ||||
return False | ||||
for attr in ['name', 'path', 'kind']: | ||||
if getattr(self, attr) != getattr(other, attr): | ||||
return False | ||||
if self.is_file(): | ||||
# FileNode compare, we need to fallback to content compare | ||||
return None | ||||
else: | ||||
# For DirNode's check without entering each dir | ||||
self_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
other_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
if self_nodes_paths != other_nodes_paths: | ||||
return False | ||||
return True | ||||
def __lt__(self, other): | ||||
if self.kind < other.kind: | ||||
return True | ||||
if self.kind > other.kind: | ||||
return False | ||||
if self.path < other.path: | ||||
return True | ||||
if self.path > other.path: | ||||
return False | ||||
# def __cmp__(self, other): | ||||
# """ | ||||
# Comparator using name of the node, needed for quick list sorting. | ||||
# """ | ||||
# | ||||
# kind_cmp = cmp(self.kind, other.kind) | ||||
# if kind_cmp: | ||||
# if isinstance(self, SubModuleNode): | ||||
# # we make submodules equal to dirnode for "sorting" purposes | ||||
# return NodeKind.DIR | ||||
# return kind_cmp | ||||
# return cmp(self.name, other.name) | ||||
def __repr__(self): | ||||
maybe_path = getattr(self, 'path', 'UNKNOWN_PATH') | ||||
return f'<{self.__class__.__name__} {maybe_path!r}>' | ||||
def __str__(self): | ||||
return self.name | ||||
def _validate_path(self, path: bytes): | ||||
self._assert_bytes(path) | ||||
if path.startswith(b'/'): | ||||
r1 | raise NodeError( | |||
r5075 | f"Cannot initialize Node objects with slash at " | |||
f"the beginning as only relative paths are supported. " | ||||
f"Got {path}") | ||||
def _assert_bytes(self, value): | ||||
if not isinstance(value, bytes): | ||||
raise TypeError(f"Bytes required as input, got {type(value)} of {value}.") | ||||
r1 | ||||
@LazyProperty | ||||
def parent(self): | ||||
parent_path = self.get_parent_path() | ||||
if parent_path: | ||||
if self.commit: | ||||
return self.commit.get_node(parent_path) | ||||
return DirNode(parent_path) | ||||
return None | ||||
@LazyProperty | ||||
r5075 | def str_path(self) -> str: | |||
return safe_str(self.path) | ||||
r1 | ||||
@LazyProperty | ||||
r2162 | def has_rtlo(self): | |||
"""Detects if a path has right-to-left-override marker""" | ||||
r5075 | return self.RTLO_MARKER in self.str_path | |||
r2162 | ||||
@LazyProperty | ||||
r1 | def dir_path(self): | |||
""" | ||||
Returns name of the directory from full path of this vcs node. Empty | ||||
string is returned if there's no directory in the path | ||||
""" | ||||
_parts = self.path.rstrip('/').rsplit('/', 1) | ||||
if len(_parts) == 2: | ||||
r5075 | return _parts[0] | |||
r4959 | return '' | |||
r1 | ||||
@LazyProperty | ||||
def name(self): | ||||
""" | ||||
Returns name of the node so if its path | ||||
then only last part is returned. | ||||
""" | ||||
r5075 | return self.path.rstrip('/').split('/')[-1] | |||
r1 | ||||
@property | ||||
def kind(self): | ||||
return self._kind | ||||
@kind.setter | ||||
def kind(self, kind): | ||||
if hasattr(self, '_kind'): | ||||
raise NodeError("Cannot change node's kind") | ||||
else: | ||||
self._kind = kind | ||||
# Post setter check (path's trailing slash) | ||||
if self.path.endswith('/'): | ||||
raise NodeError("Node's path cannot end with slash") | ||||
r5075 | def get_parent_path(self) -> bytes: | |||
r1 | """ | |||
Returns node's parent path or empty string if node is root. | ||||
""" | ||||
if self.is_root(): | ||||
r5075 | return b'' | |||
str_path = vcspath.dirname(self.path.rstrip('/')) + '/' | ||||
return safe_bytes(str_path) | ||||
r1 | ||||
def is_file(self): | ||||
""" | ||||
Returns ``True`` if node's kind is ``NodeKind.FILE``, ``False`` | ||||
otherwise. | ||||
""" | ||||
return self.kind == NodeKind.FILE | ||||
def is_dir(self): | ||||
""" | ||||
Returns ``True`` if node's kind is ``NodeKind.DIR``, ``False`` | ||||
otherwise. | ||||
""" | ||||
return self.kind == NodeKind.DIR | ||||
def is_root(self): | ||||
""" | ||||
Returns ``True`` if node is a root node and ``False`` otherwise. | ||||
""" | ||||
return self.kind == NodeKind.DIR and self.path == '' | ||||
def is_submodule(self): | ||||
""" | ||||
Returns ``True`` if node's kind is ``NodeKind.SUBMODULE``, ``False`` | ||||
otherwise. | ||||
""" | ||||
return self.kind == NodeKind.SUBMODULE | ||||
def is_largefile(self): | ||||
""" | ||||
Returns ``True`` if node's kind is ``NodeKind.LARGEFILE``, ``False`` | ||||
otherwise | ||||
""" | ||||
return self.kind == NodeKind.LARGEFILE | ||||
def is_link(self): | ||||
if self.commit: | ||||
return self.commit.is_link(self.path) | ||||
return False | ||||
@LazyProperty | ||||
def added(self): | ||||
return self.state is NodeState.ADDED | ||||
@LazyProperty | ||||
def changed(self): | ||||
return self.state is NodeState.CHANGED | ||||
@LazyProperty | ||||
def not_changed(self): | ||||
return self.state is NodeState.NOT_CHANGED | ||||
@LazyProperty | ||||
def removed(self): | ||||
return self.state is NodeState.REMOVED | ||||
class FileNode(Node): | ||||
""" | ||||
Class representing file nodes. | ||||
:attribute: path: path to the node, relative to repository's root | ||||
:attribute: content: if given arbitrary sets content of the file | ||||
:attribute: commit: if given, first time content is accessed, callback | ||||
:attribute: mode: stat mode for a node. Default is `FILEMODE_DEFAULT`. | ||||
""" | ||||
r1355 | _filter_pre_load = [] | |||
r1 | ||||
r5075 | def __init__(self, path: bytes, content: bytes | None = None, commit=None, mode=None, pre_load=None): | |||
r1 | """ | |||
Only one of ``content`` and ``commit`` may be given. Passing both | ||||
would raise ``NodeError`` exception. | ||||
:param path: relative path to the node | ||||
:param content: content may be passed to constructor | ||||
:param commit: if given, will use it to lazily fetch content | ||||
:param mode: ST_MODE (i.e. 0100644) | ||||
""" | ||||
if content and commit: | ||||
raise NodeError("Cannot use both content and commit") | ||||
r5075 | ||||
super().__init__(path, kind=NodeKind.FILE) | ||||
r1 | self.commit = commit | |||
r5075 | if content and not isinstance(content, bytes): | |||
# File content is one thing that inherently must be bytes | ||||
# we support passing str too, and convert the content | ||||
content = safe_bytes(content) | ||||
r1 | self._content = content | |||
self._mode = mode or FILEMODE_DEFAULT | ||||
r1355 | self._set_bulk_properties(pre_load) | |||
r5075 | def __eq__(self, other): | |||
eq = super(FileNode, self).__eq__(other) | ||||
if eq is not None: | ||||
return eq | ||||
return self.content == other.content | ||||
def __hash__(self): | ||||
raw_id = getattr(self.commit, 'raw_id', '') | ||||
return hash((self.path, raw_id)) | ||||
def __lt__(self, other): | ||||
lt = super(FileNode, self).__lt__(other) | ||||
if lt is not None: | ||||
return lt | ||||
return self.content < other.content | ||||
def __repr__(self): | ||||
short_id = getattr(self.commit, 'short_id', '') | ||||
return f'<{self.__class__.__name__} path={self.path!r}, short_id={short_id}>' | ||||
r1355 | def _set_bulk_properties(self, pre_load): | |||
if not pre_load: | ||||
return | ||||
pre_load = [entry for entry in pre_load | ||||
if entry not in self._filter_pre_load] | ||||
if not pre_load: | ||||
return | ||||
r5075 | remote = self.commit.get_remote() | |||
result = remote.bulk_file_request(self.commit.raw_id, self.path, pre_load) | ||||
for attr, value in result.items(): | ||||
if attr == "flags": | ||||
self.__dict__['mode'] = safe_str(value) | ||||
elif attr == "size": | ||||
self.__dict__['size'] = value | ||||
elif attr == "data": | ||||
self.__dict__['_content'] = value | ||||
elif attr == "is_binary": | ||||
self.__dict__['is_binary'] = value | ||||
elif attr == "md5": | ||||
self.__dict__['md5'] = value | ||||
else: | ||||
raise ValueError(f'Unsupported attr in bulk_property: {attr}') | ||||
r1355 | ||||
r1 | @LazyProperty | |||
def mode(self): | ||||
""" | ||||
Returns lazily mode of the FileNode. If `commit` is not set, would | ||||
use value given at initialization or `FILEMODE_DEFAULT` (default). | ||||
""" | ||||
if self.commit: | ||||
mode = self.commit.get_file_mode(self.path) | ||||
else: | ||||
mode = self._mode | ||||
return mode | ||||
r501 | @LazyProperty | |||
r5075 | def raw_bytes(self) -> bytes: | |||
r501 | """ | |||
Returns lazily the raw bytes of the FileNode. | ||||
""" | ||||
r1 | if self.commit: | |||
r501 | if self._content is None: | |||
self._content = self.commit.get_file_content(self.path) | ||||
content = self._content | ||||
r1 | else: | |||
content = self._content | ||||
return content | ||||
r5075 | def content_uncached(self): | |||
""" | ||||
Returns lazily content of the FileNode. | ||||
""" | ||||
if self.commit: | ||||
content = self.commit.get_file_content(self.path) | ||||
else: | ||||
content = self._content | ||||
return content | ||||
r3895 | def stream_bytes(self): | |||
""" | ||||
Returns an iterator that will stream the content of the file directly from | ||||
vcsserver without loading it to memory. | ||||
""" | ||||
if self.commit: | ||||
return self.commit.get_file_content_streamed(self.path) | ||||
r3896 | raise NodeError("Cannot retrieve stream_bytes without related commit attribute") | |||
r3895 | ||||
r3460 | def metadata_uncached(self): | |||
""" | ||||
Returns md5, binary flag of the file node, without any cache usage. | ||||
""" | ||||
r3479 | content = self.content_uncached() | |||
r3460 | ||||
r5075 | is_binary = bool(content and BIN_BYTE_MARKER in content) | |||
r3460 | size = 0 | |||
if content: | ||||
size = len(content) | ||||
r3479 | ||||
return is_binary, md5(content), size, content | ||||
r5075 | @LazyProperty | |||
def content(self) -> bytes: | ||||
r3479 | """ | |||
r5075 | Returns lazily content of the FileNode. | |||
""" | ||||
content = self.raw_bytes | ||||
if content and not isinstance(content, bytes): | ||||
raise ValueError(f'Content is of type {type(content)} instead of bytes') | ||||
r3479 | return content | |||
r3460 | ||||
r501 | @LazyProperty | |||
r5075 | def str_content(self) -> str: | |||
return safe_str(self.raw_bytes) | ||||
r1 | ||||
@LazyProperty | ||||
def size(self): | ||||
if self.commit: | ||||
return self.commit.get_file_size(self.path) | ||||
raise NodeError( | ||||
"Cannot retrieve size of the file without related " | ||||
"commit attribute") | ||||
@LazyProperty | ||||
def message(self): | ||||
if self.commit: | ||||
return self.last_commit.message | ||||
raise NodeError( | ||||
"Cannot retrieve message of the file without related " | ||||
"commit attribute") | ||||
@LazyProperty | ||||
def last_commit(self): | ||||
if self.commit: | ||||
r3850 | pre_load = ["author", "date", "message", "parents"] | |||
r3275 | return self.commit.get_path_commit(self.path, pre_load=pre_load) | |||
r1 | raise NodeError( | |||
"Cannot retrieve last commit of the file without " | ||||
"related commit attribute") | ||||
def get_mimetype(self): | ||||
""" | ||||
Mimetype is calculated based on the file's content. If ``_mimetype`` | ||||
attribute is available, it will be returned (backends which store | ||||
mimetypes or can easily recognize them, should set this private | ||||
attribute to indicate that type should *NOT* be calculated). | ||||
""" | ||||
if hasattr(self, '_mimetype'): | ||||
r5075 | if (isinstance(self._mimetype, (tuple, list)) and | |||
r1 | len(self._mimetype) == 2): | |||
return self._mimetype | ||||
else: | ||||
raise NodeError('given _mimetype attribute must be an 2 ' | ||||
'element list or tuple') | ||||
db = get_mimetypes_db() | ||||
mtype, encoding = db.guess_type(self.name) | ||||
if mtype is None: | ||||
r4666 | if not self.is_largefile() and self.is_binary: | |||
r1 | mtype = 'application/octet-stream' | |||
encoding = None | ||||
else: | ||||
mtype = 'text/plain' | ||||
encoding = None | ||||
# try with pygments | ||||
try: | ||||
from pygments.lexers import get_lexer_for_filename | ||||
mt = get_lexer_for_filename(self.name).mimetypes | ||||
except Exception: | ||||
mt = None | ||||
if mt: | ||||
mtype = mt[0] | ||||
return mtype, encoding | ||||
@LazyProperty | ||||
def mimetype(self): | ||||
""" | ||||
Wrapper around full mimetype info. It returns only type of fetched | ||||
mimetype without the encoding part. use get_mimetype function to fetch | ||||
full set of (type,encoding) | ||||
""" | ||||
return self.get_mimetype()[0] | ||||
@LazyProperty | ||||
def mimetype_main(self): | ||||
return self.mimetype.split('/')[0] | ||||
r1357 | @classmethod | |||
def get_lexer(cls, filename, content=None): | ||||
r1 | from pygments import lexers | |||
r796 | ||||
r1357 | extension = filename.split('.')[-1] | |||
r796 | lexer = None | |||
r1357 | ||||
r1 | try: | |||
r796 | lexer = lexers.guess_lexer_for_filename( | |||
r1357 | filename, content, stripnl=False) | |||
r1 | except lexers.ClassNotFound: | |||
r5075 | pass | |||
r796 | ||||
# try our EXTENSION_MAP | ||||
if not lexer: | ||||
try: | ||||
r1357 | lexer_class = LANGUAGES_EXTENSIONS_MAP.get(extension) | |||
r796 | if lexer_class: | |||
lexer = lexers.get_lexer_by_name(lexer_class[0]) | ||||
except lexers.ClassNotFound: | ||||
r5075 | pass | |||
r796 | ||||
if not lexer: | ||||
r1 | lexer = lexers.TextLexer(stripnl=False) | |||
r796 | ||||
r1 | return lexer | |||
@LazyProperty | ||||
r1357 | def lexer(self): | |||
""" | ||||
Returns pygment's lexer class. Would try to guess lexer taking file's | ||||
content, name and mimetype. | ||||
""" | ||||
r5075 | # TODO: this is more proper, but super heavy on investigating the type based on the content | |||
#self.get_lexer(self.name, self.content) | ||||
return self.get_lexer(self.name) | ||||
r1357 | ||||
@LazyProperty | ||||
r1 | def lexer_alias(self): | |||
""" | ||||
Returns first alias of the lexer guessed for this file. | ||||
""" | ||||
return self.lexer.aliases[0] | ||||
@LazyProperty | ||||
def history(self): | ||||
""" | ||||
Returns a list of commit for this file in which the file was changed | ||||
""" | ||||
if self.commit is None: | ||||
raise NodeError('Unable to get commit for this FileNode') | ||||
r3275 | return self.commit.get_path_history(self.path) | |||
r1 | ||||
@LazyProperty | ||||
def annotate(self): | ||||
""" | ||||
Returns a list of three element tuples with lineno, commit and line | ||||
""" | ||||
if self.commit is None: | ||||
raise NodeError('Unable to get commit for this FileNode') | ||||
r3850 | pre_load = ["author", "date", "message", "parents"] | |||
r1 | return self.commit.get_file_annotate(self.path, pre_load=pre_load) | |||
@LazyProperty | ||||
def state(self): | ||||
if not self.commit: | ||||
raise NodeError( | ||||
"Cannot check state of the node if it's not " | ||||
"linked with commit") | ||||
elif self.path in (node.path for node in self.commit.added): | ||||
return NodeState.ADDED | ||||
elif self.path in (node.path for node in self.commit.changed): | ||||
return NodeState.CHANGED | ||||
else: | ||||
return NodeState.NOT_CHANGED | ||||
r501 | @LazyProperty | |||
r1 | def is_binary(self): | |||
""" | ||||
Returns True if file has binary content. | ||||
""" | ||||
r3896 | if self.commit: | |||
return self.commit.is_node_binary(self.path) | ||||
else: | ||||
raw_bytes = self._content | ||||
r5075 | return bool(raw_bytes and BIN_BYTE_MARKER in raw_bytes) | |||
@LazyProperty | ||||
def md5(self): | ||||
""" | ||||
Returns md5 of the file node. | ||||
""" | ||||
if self.commit: | ||||
return self.commit.node_md5_hash(self.path) | ||||
else: | ||||
raw_bytes = self._content | ||||
# TODO: this sucks, we're computing md5 on potentially super big stream data... | ||||
return md5(raw_bytes) | ||||
r1 | ||||
@LazyProperty | ||||
def extension(self): | ||||
"""Returns filenode extension""" | ||||
return self.name.split('.')[-1] | ||||
@property | ||||
def is_executable(self): | ||||
""" | ||||
Returns ``True`` if file has executable flag turned on. | ||||
""" | ||||
return bool(self.mode & stat.S_IXUSR) | ||||
def get_largefile_node(self): | ||||
""" | ||||
Try to return a Mercurial FileNode from this node. It does internal | ||||
checks inside largefile store, if that file exist there it will | ||||
create special instance of LargeFileNode which can get content from | ||||
LF store. | ||||
""" | ||||
r1577 | if self.commit: | |||
return self.commit.get_largefile_node(self.path) | ||||
r1 | ||||
r5075 | def count_lines(self, content: str | bytes, count_empty=False): | |||
if isinstance(content, str): | ||||
newline_marker = '\n' | ||||
elif isinstance(content, bytes): | ||||
newline_marker = b'\n' | ||||
else: | ||||
raise ValueError('content must be bytes or str got {type(content)} instead') | ||||
r3962 | ||||
if count_empty: | ||||
all_lines = 0 | ||||
empty_lines = 0 | ||||
for line in content.splitlines(True): | ||||
r5075 | if line == newline_marker: | |||
r3962 | empty_lines += 1 | |||
all_lines += 1 | ||||
return all_lines, all_lines - empty_lines | ||||
else: | ||||
# fast method | ||||
r5075 | empty_lines = all_lines = content.count(newline_marker) | |||
r3962 | if all_lines == 0 and content: | |||
# one-line without a newline | ||||
empty_lines = all_lines = 1 | ||||
return all_lines, empty_lines | ||||
r1 | def lines(self, count_empty=False): | |||
all_lines, empty_lines = 0, 0 | ||||
if not self.is_binary: | ||||
r501 | content = self.content | |||
r3962 | all_lines, empty_lines = self.count_lines(content, count_empty=count_empty) | |||
r1 | return all_lines, empty_lines | |||
class RemovedFileNode(FileNode): | ||||
""" | ||||
Dummy FileNode class - trying to access any public attribute except path, | ||||
name, kind or state (or methods/attributes checking those two) would raise | ||||
RemovedFileNodeError. | ||||
""" | ||||
ALLOWED_ATTRIBUTES = [ | ||||
'name', 'path', 'state', 'is_root', 'is_file', 'is_dir', 'kind', | ||||
r5075 | 'added', 'changed', 'not_changed', 'removed', 'bytes_path' | |||
r1 | ] | |||
def __init__(self, path): | ||||
""" | ||||
:param path: relative path to the node | ||||
""" | ||||
r5075 | super().__init__(path=path) | |||
r1 | ||||
def __getattribute__(self, attr): | ||||
if attr.startswith('_') or attr in RemovedFileNode.ALLOWED_ATTRIBUTES: | ||||
r5075 | return super().__getattribute__(attr) | |||
raise RemovedFileNodeError(f"Cannot access attribute {attr} on RemovedFileNode. Not in allowed attributes") | ||||
r1 | ||||
@LazyProperty | ||||
def state(self): | ||||
return NodeState.REMOVED | ||||
class DirNode(Node): | ||||
""" | ||||
DirNode stores list of files and directories within this node. | ||||
Nodes may be used standalone but within repository context they | ||||
r3925 | lazily fetch data within same repository's commit. | |||
r1 | """ | |||
r5075 | def __init__(self, path, nodes=(), commit=None, default_pre_load=None): | |||
r1 | """ | |||
Only one of ``nodes`` and ``commit`` may be given. Passing both | ||||
would raise ``NodeError`` exception. | ||||
:param path: relative path to the node | ||||
:param nodes: content may be passed to constructor | ||||
:param commit: if given, will use it to lazily fetch content | ||||
""" | ||||
if nodes and commit: | ||||
raise NodeError("Cannot use both nodes and commit") | ||||
super(DirNode, self).__init__(path, NodeKind.DIR) | ||||
self.commit = commit | ||||
self._nodes = nodes | ||||
r5075 | self.default_pre_load = default_pre_load or ['is_binary', 'size'] | |||
def __iter__(self): | ||||
for node in self.nodes: | ||||
yield node | ||||
def __eq__(self, other): | ||||
eq = super(DirNode, self).__eq__(other) | ||||
if eq is not None: | ||||
return eq | ||||
# check without entering each dir | ||||
self_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
other_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
return self_nodes_paths == other_nodes_paths | ||||
def __lt__(self, other): | ||||
lt = super(DirNode, self).__lt__(other) | ||||
if lt is not None: | ||||
return lt | ||||
# check without entering each dir | ||||
self_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
other_nodes_paths = list(sorted(n.path for n in self.nodes)) | ||||
return self_nodes_paths < other_nodes_paths | ||||
r1 | ||||
@LazyProperty | ||||
def content(self): | ||||
r5075 | raise NodeError(f"{self} represents a dir and has no `content` attribute") | |||
r1 | ||||
@LazyProperty | ||||
def nodes(self): | ||||
if self.commit: | ||||
r5075 | nodes = self.commit.get_nodes(self.path, pre_load=self.default_pre_load) | |||
r1 | else: | |||
nodes = self._nodes | ||||
self._nodes_dict = dict((node.path, node) for node in nodes) | ||||
return sorted(nodes) | ||||
@LazyProperty | ||||
def files(self): | ||||
return sorted((node for node in self.nodes if node.is_file())) | ||||
@LazyProperty | ||||
def dirs(self): | ||||
return sorted((node for node in self.nodes if node.is_dir())) | ||||
def get_node(self, path): | ||||
""" | ||||
Returns node from within this particular ``DirNode``, so it is now | ||||
allowed to fetch, i.e. node located at 'docs/api/index.rst' from node | ||||
'docs'. In order to access deeper nodes one must fetch nodes between | ||||
them first - this would work:: | ||||
docs = root.get_node('docs') | ||||
docs.get_node('api').get_node('index.rst') | ||||
:param: path - relative to the current node | ||||
.. note:: | ||||
To access lazily (as in example above) node have to be initialized | ||||
with related commit object - without it node is out of | ||||
context and may know nothing about anything else than nearest | ||||
(located at same level) nodes. | ||||
""" | ||||
try: | ||||
path = path.rstrip('/') | ||||
if path == '': | ||||
raise NodeError("Cannot retrieve node without path") | ||||
self.nodes # access nodes first in order to set _nodes_dict | ||||
paths = path.split('/') | ||||
if len(paths) == 1: | ||||
if not self.is_root(): | ||||
path = '/'.join((self.path, paths[0])) | ||||
else: | ||||
path = paths[0] | ||||
return self._nodes_dict[path] | ||||
elif len(paths) > 1: | ||||
if self.commit is None: | ||||
r3896 | raise NodeError("Cannot access deeper nodes without commit") | |||
r1 | else: | |||
path1, path2 = paths[0], '/'.join(paths[1:]) | ||||
return self.get_node(path1).get_node(path2) | ||||
else: | ||||
raise KeyError | ||||
except KeyError: | ||||
r5075 | raise NodeError(f"Node does not exist at {path}") | |||
r1 | ||||
@LazyProperty | ||||
def state(self): | ||||
raise NodeError("Cannot access state of DirNode") | ||||
@LazyProperty | ||||
def size(self): | ||||
size = 0 | ||||
for root, dirs, files in self.commit.walk(self.path): | ||||
for f in files: | ||||
size += f.size | ||||
return size | ||||
r3275 | @LazyProperty | |||
def last_commit(self): | ||||
if self.commit: | ||||
r3850 | pre_load = ["author", "date", "message", "parents"] | |||
r3275 | return self.commit.get_path_commit(self.path, pre_load=pre_load) | |||
raise NodeError( | ||||
"Cannot retrieve last commit of the file without " | ||||
"related commit attribute") | ||||
r1 | def __repr__(self): | |||
r5075 | short_id = getattr(self.commit, 'short_id', '') | |||
return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>' | ||||
r1 | ||||
class RootNode(DirNode): | ||||
""" | ||||
DirNode being the root node of the repository. | ||||
""" | ||||
def __init__(self, nodes=(), commit=None): | ||||
r5075 | super(RootNode, self).__init__(path=b'', nodes=nodes, commit=commit) | |||
r1 | ||||
def __repr__(self): | ||||
r5075 | return f'<{self.__class__.__name__}>' | |||
r1 | ||||
class SubModuleNode(Node): | ||||
""" | ||||
represents a SubModule of Git or SubRepo of Mercurial | ||||
""" | ||||
is_binary = False | ||||
size = 0 | ||||
def __init__(self, name, url=None, commit=None, alias=None): | ||||
self.path = name | ||||
self.kind = NodeKind.SUBMODULE | ||||
self.alias = alias | ||||
# we have to use EmptyCommit here since this can point to svn/git/hg | ||||
# submodules we cannot get from repository | ||||
self.commit = EmptyCommit(str(commit), alias=alias) | ||||
self.url = url or self._extract_submodule_url() | ||||
def __repr__(self): | ||||
r5075 | short_id = getattr(self.commit, 'short_id', '') | |||
return f'<{self.__class__.__name__} {self.path!r} @ {short_id}>' | ||||
r1 | ||||
def _extract_submodule_url(self): | ||||
# TODO: find a way to parse gits submodule file and extract the | ||||
# linking URL | ||||
return self.path | ||||
@LazyProperty | ||||
def name(self): | ||||
""" | ||||
Returns name of the node so if its path | ||||
then only last part is returned. | ||||
""" | ||||
r5075 | org = safe_str(self.path.rstrip('/').split('/')[-1]) | |||
return f'{org} @ {self.commit.short_id}' | ||||
r1 | ||||
class LargeFileNode(FileNode): | ||||
r1577 | def __init__(self, path, url=None, commit=None, alias=None, org_path=None): | |||
r5075 | self._validate_path(path) # can throw exception if path is invalid | |||
self.org_path = org_path # as stored in VCS as LF pointer | ||||
self.bytes_path = path.rstrip(b'/') # store for __repr__ | ||||
self.path = safe_str(self.bytes_path) # we store paths as str | ||||
r1577 | self.kind = NodeKind.LARGEFILE | |||
self.alias = alias | ||||
r5075 | self._content = b'' | |||
r1577 | ||||
r5075 | def _validate_path(self, path: bytes): | |||
r1 | """ | |||
r5075 | we override check since the LargeFileNode path is system absolute, but we check for bytes only | |||
r1 | """ | |||
r5075 | self._assert_bytes(path) | |||
r1 | ||||
r1577 | def __repr__(self): | |||
r5075 | return f'<{self.__class__.__name__} {self.org_path} -> {self.path!r}>' | |||
r1577 | ||||
@LazyProperty | ||||
def size(self): | ||||
return os.stat(self.path).st_size | ||||
@LazyProperty | ||||
r501 | def raw_bytes(self): | |||
r1627 | with open(self.path, 'rb') as f: | |||
content = f.read() | ||||
r1577 | return content | |||
@LazyProperty | ||||
def name(self): | ||||
""" | ||||
Overwrites name to be the org lf path | ||||
""" | ||||
return self.org_path | ||||
r3895 | ||||
def stream_bytes(self): | ||||
with open(self.path, 'rb') as stream: | ||||
while True: | ||||
data = stream.read(16 * 1024) | ||||
if not data: | ||||
break | ||||
yield data | ||||