diff --git a/vcsserver/remote/git.py b/vcsserver/remote/git.py --- a/vcsserver/remote/git.py +++ b/vcsserver/remote/git.py @@ -18,7 +18,6 @@ import collections import logging import os -import posixpath as vcspath import re import stat import traceback @@ -32,7 +31,7 @@ import pygit2 from pygit2 import Repository as LibGit2Repo from pygit2 import index as LibGit2Index from dulwich import index, objects -from dulwich.client import HttpGitClient, LocalGitClient +from dulwich.client import HttpGitClient, LocalGitClient, FetchPackResult from dulwich.errors import ( NotGitRepository, ChecksumMismatch, WrongObjectException, MissingCommitError, ObjectMissing, HangupException, @@ -42,7 +41,7 @@ from dulwich.server import update_server from vcsserver import exceptions, settings, subprocessio from vcsserver.str_utils import safe_str, safe_int, safe_bytes, ascii_bytes -from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, archive_repo, BinaryEnvelope +from vcsserver.base import RepoFactory, obfuscate_qs, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope from vcsserver.hgcompat import ( hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler) from vcsserver.git_lfs.lib import LFSOidStore @@ -127,6 +126,28 @@ class GitFactory(RepoFactory): return self.repo(wire, use_libgit2=True) +def create_signature_from_string(author_str, **kwargs): + """ + Creates a pygit2.Signature object from a string of the format 'Name '. + + :param author_str: String of the format 'Name ' + :return: pygit2.Signature object + """ + match = re.match(r'^(.+) <(.+)>$', author_str) + if match is None: + raise ValueError(f"Invalid format: {author_str}") + + name, email = match.groups() + return pygit2.Signature(name, email, **kwargs) + + +def get_obfuscated_url(url_obj): + url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd + url_obj.query = obfuscate_qs(url_obj.query) + obfuscated_uri = str(url_obj) + return obfuscated_uri + + class GitRemote(RemoteBase): def __init__(self, factory): @@ -139,6 +160,13 @@ class GitRemote(RemoteBase): "parents": self.parents, "_commit": self.revision, } + self._bulk_file_methods = { + "size": self.get_node_size, + "data": self.get_node_data, + "flags": self.get_node_flags, + "is_binary": self.get_node_is_binary, + "md5": self.md5_hash + } def _wire_to_config(self, wire): if 'config' in wire: @@ -213,11 +241,63 @@ class GitRemote(RemoteBase): return repo.is_bare @reraise_safe_exceptions + def get_node_data(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return BytesEnvelope(blob_obj.data) + + @reraise_safe_exceptions + def get_node_size(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.size + + @reraise_safe_exceptions + def get_node_flags(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.filemode + + @reraise_safe_exceptions + def get_node_is_binary(self, wire, commit_id, path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[commit_id] + blob_obj = commit.tree[path] + + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{commit_id} is not a blob: {blob_obj.type_str}') + + return blob_obj.is_binary + + @reraise_safe_exceptions def blob_as_pretty_string(self, wire, sha): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: blob_obj = repo[sha] - return BinaryEnvelope(blob_obj.data) + return BytesEnvelope(blob_obj.data) @reraise_safe_exceptions def blob_raw_length(self, wire, sha): @@ -283,15 +363,24 @@ class GitRemote(RemoteBase): return _is_binary(repo_id, tree_id) @reraise_safe_exceptions - def md5_hash(self, wire, tree_id): + def md5_hash(self, wire, commit_id, path): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) - def _md5_hash(_repo_id, _tree_id): - return '' + def _md5_hash(_repo_id, _commit_id, _path): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + commit = repo[_commit_id] + blob_obj = commit.tree[_path] - return _md5_hash(repo_id, tree_id) + if blob_obj.type != pygit2.GIT_OBJ_BLOB: + raise exceptions.LookupException()( + f'Tree for commit_id:{_commit_id} is not a blob: {blob_obj.type_str}') + + return '' + + return _md5_hash(repo_id, commit_id, path) @reraise_safe_exceptions def in_largefiles_store(self, wire, oid): @@ -343,10 +432,29 @@ class GitRemote(RemoteBase): return _bulk_request(repo_id, rev, sorted(pre_load)) - def _build_opener(self, url): + @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return BinaryEnvelope(result) + + return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load)) + + def _build_opener(self, url: str): handlers = [] - url_obj = url_parser(url) - _, authinfo = url_obj.authinfo() + url_obj = url_parser(safe_bytes(url)) + authinfo = url_obj.authinfo()[1] if authinfo: # create a password manager @@ -358,27 +466,19 @@ class GitRemote(RemoteBase): return urllib.request.build_opener(*handlers) - def _type_id_to_name(self, type_id: int): - return { - 1: 'commit', - 2: 'tree', - 3: 'blob', - 4: 'tag' - }[type_id] - @reraise_safe_exceptions def check_url(self, url, config): url_obj = url_parser(safe_bytes(url)) - test_uri, _ = url_obj.authinfo() - url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd - url_obj.query = obfuscate_qs(url_obj.query) - cleaned_uri = str(url_obj) - log.info("Checking URL for remote cloning/import: %s", cleaned_uri) + + test_uri = safe_str(url_obj.authinfo()[0]) + obfuscated_uri = get_obfuscated_url(url_obj) + + log.info("Checking URL for remote cloning/import: %s", obfuscated_uri) if not test_uri.endswith('info/refs'): test_uri = test_uri.rstrip('/') + '/info/refs' - o = self._build_opener(url) + o = self._build_opener(test_uri) o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git q = {"service": 'git-upload-pack'} @@ -387,25 +487,28 @@ class GitRemote(RemoteBase): req = urllib.request.Request(cu, None, {}) try: - log.debug("Trying to open URL %s", cleaned_uri) + log.debug("Trying to open URL %s", obfuscated_uri) resp = o.open(req) if resp.code != 200: raise exceptions.URLError()('Return Code is not 200') except Exception as e: - log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True) + log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True) # means it cannot be cloned - raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e)) + raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e)) # now detect if it's proper git repo - gitdata = resp.read() - if 'service=git-upload-pack' in gitdata: + gitdata: bytes = resp.read() + + if b'service=git-upload-pack' in gitdata: pass - elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata): + elif re.findall(br'[0-9a-fA-F]{40}\s+refs', gitdata): # old style git can return some other format ! pass else: - raise exceptions.URLError()( - "url [{}] does not look like an git".format(cleaned_uri)) + e = None + raise exceptions.URLError(e)( + "url [%s] does not look like an hg repo org_exc: %s" + % (obfuscated_uri, e)) return True @@ -468,157 +571,112 @@ class GitRemote(RemoteBase): repo.object_store.add_object(blob) return blob.id - # TODO: this is quite complex, check if that can be simplified + @reraise_safe_exceptions + def create_commit(self, wire, author, committer, message, branch, new_tree_id, date_args: list[int, int] = None): + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + + if date_args: + current_time, offset = date_args + + kw = { + 'time': current_time, + 'offset': offset + } + author = create_signature_from_string(author, **kw) + committer = create_signature_from_string(committer, **kw) + + tree = new_tree_id + if isinstance(tree, (bytes, str)): + # validate this tree is in the repo... + tree = repo[safe_str(tree)].id + + parents = [] + # ensure we COMMIT on top of given branch head + # check if this repo has ANY branches, otherwise it's a new branch case we need to make + if branch in repo.branches.local: + parents += [repo.branches[branch].target] + elif [x for x in repo.branches.local]: + parents += [repo.head.target] + #else: + # in case we want to commit on new branch we create it on top of HEAD + #repo.branches.local.create(branch, repo.revparse_single('HEAD')) + + # # Create a new commit + commit_oid = repo.create_commit( + f'refs/heads/{branch}', # the name of the reference to update + author, # the author of the commit + committer, # the committer of the commit + message, # the commit message + tree, # the tree produced by the index + parents # list of parents for the new commit, usually just one, + ) + + new_commit_id = safe_str(commit_oid) + + return new_commit_id + @reraise_safe_exceptions def commit(self, wire, commit_data, branch, commit_tree, updated, removed): - # Defines the root tree - class _Root(object): - def __repr__(self): - return 'ROOT TREE' - ROOT = _Root() - repo = self._factory.repo(wire) - object_store = repo.object_store - - # Create tree and populates it with blobs - if commit_tree: - commit_tree = safe_bytes(commit_tree) - - if commit_tree and repo[commit_tree]: - git_commit = repo[safe_bytes(commit_data['parents'][0])] - commit_tree = repo[git_commit.tree] # root tree - else: - commit_tree = objects.Tree() - - for node in updated: - # Compute subdirs if needed - dirpath, nodename = vcspath.split(node['path']) - dirnames = list(map(safe_str, dirpath and dirpath.split('/') or [])) - parent = commit_tree - ancestors = [('', parent)] + def mode2pygit(mode): + """ + git only supports two filemode 644 and 755 - # Tries to dig for the deepest existing tree - while dirnames: - curdir = dirnames.pop(0) - try: - dir_id = parent[curdir][1] - except KeyError: - # put curdir back into dirnames and stops - dirnames.insert(0, curdir) - break - else: - # If found, updates parent - parent = repo[dir_id] - ancestors.append((curdir, parent)) - # Now parent is deepest existing tree and we need to create - # subtrees for dirnames (in reverse order) - # [this only applies for nodes from added] - new_trees = [] + 0o100755 -> 33261 + 0o100644 -> 33188 + """ + return { + 0o100644: pygit2.GIT_FILEMODE_BLOB, + 0o100755: pygit2.GIT_FILEMODE_BLOB_EXECUTABLE, + 0o120000: pygit2.GIT_FILEMODE_LINK + }.get(mode) or pygit2.GIT_FILEMODE_BLOB - blob = objects.Blob.from_string(node['content']) - - node_path = safe_bytes(node['node_path']) + repo_init = self._factory.repo_libgit2(wire) + with repo_init as repo: + repo_index = repo.index - if dirnames: - # If there are trees which should be created we need to build - # them now (in reverse order) - reversed_dirnames = list(reversed(dirnames)) - curtree = objects.Tree() - curtree[node_path] = node['mode'], blob.id - new_trees.append(curtree) - for dirname in reversed_dirnames[:-1]: - newtree = objects.Tree() - newtree[dirname] = (DIR_STAT, curtree.id) - new_trees.append(newtree) - curtree = newtree - parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id) - else: - parent.add(name=node_path, mode=node['mode'], hexsha=blob.id) + for pathspec in updated: + blob_id = repo.create_blob(pathspec['content']) + ie = pygit2.IndexEntry(pathspec['path'], blob_id, mode2pygit(pathspec['mode'])) + repo_index.add(ie) - new_trees.append(parent) - # Update ancestors - reversed_ancestors = reversed( - [(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])]) - for parent, tree, path in reversed_ancestors: - parent[path] = (DIR_STAT, tree.id) - object_store.add_object(tree) + for pathspec in removed: + repo_index.remove(pathspec) - object_store.add_object(blob) - for tree in new_trees: - object_store.add_object(tree) + # Write changes to the index + repo_index.write() + + # Create a tree from the updated index + commit_tree = repo_index.write_tree() + + new_tree_id = commit_tree - for node_path in removed: - paths = node_path.split('/') - tree = commit_tree # start with top-level - trees = [{'tree': tree, 'path': ROOT}] - # Traverse deep into the forest... - # resolve final tree by iterating the path. - # e.g a/b/c.txt will get - # - root as tree then - # - 'a' as tree, - # - 'b' as tree, - # - stop at c as blob. - for path in paths: - try: - obj = repo[tree[path][1]] - if isinstance(obj, objects.Tree): - trees.append({'tree': obj, 'path': path}) - tree = obj - except KeyError: - break - #PROBLEM: - """ - We're not editing same reference tree object - """ - # Cut down the blob and all rotten trees on the way back... - for path, tree_data in reversed(list(zip(paths, trees))): - tree = tree_data['tree'] - tree.__delitem__(path) - # This operation edits the tree, we need to mark new commit back + author = commit_data['author'] + committer = commit_data['committer'] + message = commit_data['message'] + + date_args = [int(commit_data['commit_time']), int(commit_data['commit_timezone'])] - if len(tree) > 0: - # This tree still has elements - don't remove it or any - # of it's parents - break - - object_store.add_object(commit_tree) + new_commit_id = self.create_commit(wire, author, committer, message, branch, + new_tree_id, date_args=date_args) - # Create commit - commit = objects.Commit() - commit.tree = commit_tree.id - bytes_keys = [ - 'author', - 'committer', - 'message', - 'encoding', - 'parents' - ] + # libgit2, ensure the branch is there and exists + self.create_branch(wire, branch, new_commit_id) - for k, v in commit_data.items(): - if k in bytes_keys: - if k == 'parents': - v = [safe_bytes(x) for x in v] - else: - v = safe_bytes(v) - setattr(commit, k, v) + # libgit2, set new ref to this created commit + self.set_refs(wire, f'refs/heads/{branch}', new_commit_id) - object_store.add_object(commit) - - self.create_branch(wire, branch, safe_str(commit.id)) - - # dulwich set-ref - repo.refs[safe_bytes(f'refs/heads/{branch}')] = commit.id - - return commit.id + return new_commit_id @reraise_safe_exceptions def pull(self, wire, url, apply_refs=True, refs=None, update_after=False): if url != 'default' and '://' not in url: client = LocalGitClient(url) else: - url_obj = url_parser(url) + url_obj = url_parser(safe_bytes(url)) o = self._build_opener(url) - url, _ = url_obj.authinfo() + url = url_obj.authinfo()[0] client = HttpGitClient(base_url=url, opener=o) repo = self._factory.repo(wire) @@ -674,6 +732,9 @@ class GitRemote(RemoteBase): repo[HEAD_MARKER] = remote_refs[HEAD_MARKER] index.build_index_from_tree(repo.path, repo.index_path(), repo.object_store, repo[HEAD_MARKER].tree) + + if isinstance(remote_refs, FetchPackResult): + return remote_refs.refs return remote_refs @reraise_safe_exceptions @@ -759,11 +820,11 @@ class GitRemote(RemoteBase): wire_remote = wire.copy() wire_remote['path'] = path2 repo_remote = self._factory.repo(wire_remote) - LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote) + LocalGitClient(thin_packs=False).fetch(path2, repo_remote) revs = [ x.commit.id - for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])] + for x in repo_remote.get_walker(include=[safe_bytes(rev2)], exclude=[safe_bytes(rev1)])] return revs @reraise_safe_exceptions @@ -815,11 +876,11 @@ class GitRemote(RemoteBase): raise exceptions.LookupException(e)(missing_commit_err) commit_id = commit.hex - type_id = commit.type + type_str = commit.type_str return { 'id': commit_id, - 'type': self._type_id_to_name(type_id), + 'type': type_str, 'commit_id': commit_id, 'idx': 0 } @@ -1018,7 +1079,11 @@ class GitRemote(RemoteBase): def create_branch(self, wire, branch_name, commit_id, force=False): repo_init = self._factory.repo_libgit2(wire) with repo_init as repo: - commit = repo[commit_id] + if commit_id: + commit = repo[commit_id] + else: + # if commit is not given just use the HEAD + commit = repo.head() if force: repo.branches.local.create(branch_name, commit, force=force) @@ -1041,12 +1106,27 @@ class GitRemote(RemoteBase): @reraise_safe_exceptions def tree_changes(self, wire, source_id, target_id): - # TODO(marcink): remove this seems it's only used by tests repo = self._factory.repo(wire) + # source can be empty + source_id = safe_bytes(source_id if source_id else b'') + target_id = safe_bytes(target_id) + source = repo[source_id].tree if source_id else None target = repo[target_id].tree result = repo.object_store.tree_changes(source, target) - return list(result) + + added = set() + modified = set() + deleted = set() + for (old_path, new_path), (_, _), (_, _) in list(result): + if new_path and old_path: + modified.add(new_path) + elif new_path and not old_path: + added.add(new_path) + elif not new_path and old_path: + deleted.add(old_path) + + return list(added), list(modified), list(deleted) @reraise_safe_exceptions def tree_and_type_for_path(self, wire, commit_id, path): @@ -1167,10 +1247,11 @@ class GitRemote(RemoteBase): if file_filter: for p in diff_obj: if p.delta.old_file.path == file_filter: - return BinaryEnvelope(p.data) or BinaryEnvelope(b'') + return BytesEnvelope(p.data) or BytesEnvelope(b'') # fo matching path == no diff - return BinaryEnvelope(b'') - return BinaryEnvelope(diff_obj.patch) or BinaryEnvelope(b'') + return BytesEnvelope(b'') + + return BytesEnvelope(safe_bytes(diff_obj.patch)) or BytesEnvelope(b'') @reraise_safe_exceptions def node_history(self, wire, commit_id, path, limit): @@ -1346,8 +1427,8 @@ class GitRemote(RemoteBase): return [head_name] + [f'set HEAD to refs/heads/{head_name}'] @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def file_walker(_commit_id, path): repo_init = self._factory.repo_libgit2(wire) @@ -1378,5 +1459,5 @@ class GitRemote(RemoteBase): continue yield ArchiveNode(file_path, mode, is_link, repo[file_node.hex].read_raw) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) diff --git a/vcsserver/remote/hg.py b/vcsserver/remote/hg.py --- a/vcsserver/remote/hg.py +++ b/vcsserver/remote/hg.py @@ -32,7 +32,8 @@ from mercurial import repair import vcsserver from vcsserver import exceptions -from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, archive_repo, ArchiveNode, BinaryEnvelope +from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original, store_archive_in_cache, ArchiveNode, BytesEnvelope, \ + BinaryEnvelope from vcsserver.hgcompat import ( archival, bin, clone, config as hgconfig, diffopts, hex, get_ctx, hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler, @@ -42,6 +43,8 @@ from vcsserver.hgcompat import ( alwaysmatcher, patternmatcher, hgutil, hgext_strip) from vcsserver.str_utils import ascii_bytes, ascii_str, safe_str, safe_bytes from vcsserver.vcs_base import RemoteBase +from vcsserver.config import hooks as hooks_config + log = logging.getLogger(__name__) @@ -137,9 +140,18 @@ class MercurialFactory(RepoFactory): def _create_config(self, config, hooks=True): if not hooks: - hooks_to_clean = frozenset(( - 'changegroup.repo_size', 'preoutgoing.pre_pull', - 'outgoing.pull_logger', 'prechangegroup.pre_push')) + + hooks_to_clean = { + + hooks_config.HOOK_REPO_SIZE, + hooks_config.HOOK_PRE_PULL, + hooks_config.HOOK_PULL, + + hooks_config.HOOK_PRE_PUSH, + # TODO: what about PRETXT, this was disabled in pre 5.0.0 + hooks_config.HOOK_PRETX_PUSH, + + } new_config = [] for section, option, value in config: if section == 'hooks' and option in hooks_to_clean: @@ -178,6 +190,22 @@ def patch_ui_message_output(baseui): return baseui, output +def get_obfuscated_url(url_obj): + url_obj.passwd = b'*****' if url_obj.passwd else url_obj.passwd + url_obj.query = obfuscate_qs(url_obj.query) + obfuscated_uri = str(url_obj) + return obfuscated_uri + + +def normalize_url_for_hg(url: str): + _proto = None + + if '+' in url[:url.find('://')]: + _proto = url[0:url.find('+')] + url = url[url.find('+') + 1:] + return url, _proto + + class HgRemote(RemoteBase): def __init__(self, factory): @@ -196,6 +224,13 @@ class HgRemote(RemoteBase): "hidden": self.ctx_hidden, "_file_paths": self.ctx_list, } + self._bulk_file_methods = { + "size": self.fctx_size, + "data": self.fctx_node_data, + "flags": self.fctx_flags, + "is_binary": self.is_binary, + "md5": self.md5_hash, + } def _get_ctx(self, repo, ref): return get_ctx(repo, ref) @@ -405,19 +440,15 @@ class HgRemote(RemoteBase): @reraise_safe_exceptions def check_url(self, url, config): - _proto = None - if '+' in url[:url.find('://')]: - _proto = url[0:url.find('+')] - url = url[url.find('+') + 1:] + url, _proto = normalize_url_for_hg(url) + url_obj = url_parser(safe_bytes(url)) + + test_uri = safe_str(url_obj.authinfo()[0]) + authinfo = url_obj.authinfo()[1] + obfuscated_uri = get_obfuscated_url(url_obj) + log.info("Checking URL for remote cloning/import: %s", obfuscated_uri) + handlers = [] - url_obj = url_parser(url) - test_uri, authinfo = url_obj.authinfo() - url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd - url_obj.query = obfuscate_qs(url_obj.query) - - cleaned_uri = str(url_obj) - log.info("Checking URL for remote cloning/import: %s", cleaned_uri) - if authinfo: # create a password manager passmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() @@ -437,14 +468,14 @@ class HgRemote(RemoteBase): req = urllib.request.Request(cu, None, {}) try: - log.debug("Trying to open URL %s", cleaned_uri) + log.debug("Trying to open URL %s", obfuscated_uri) resp = o.open(req) if resp.code != 200: raise exceptions.URLError()('Return Code is not 200') except Exception as e: - log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True) + log.warning("URL cannot be opened: %s", obfuscated_uri, exc_info=True) # means it cannot be cloned - raise exceptions.URLError(e)("[{}] org_exc: {}".format(cleaned_uri, e)) + raise exceptions.URLError(e)("[{}] org_exc: {}".format(obfuscated_uri, e)) # now check if it's a proper hg repo, but don't do it for svn try: @@ -453,19 +484,18 @@ class HgRemote(RemoteBase): else: # check for pure hg repos log.debug( - "Verifying if URL is a Mercurial repository: %s", - cleaned_uri) + "Verifying if URL is a Mercurial repository: %s", obfuscated_uri) ui = make_ui_from_config(config) - peer_checker = makepeer(ui, url) - peer_checker.lookup('tip') + peer_checker = makepeer(ui, safe_bytes(url)) + peer_checker.lookup(b'tip') except Exception as e: log.warning("URL is not a valid Mercurial repository: %s", - cleaned_uri) + obfuscated_uri) raise exceptions.URLError(e)( "url [%s] does not look like an hg repo org_exc: %s" - % (cleaned_uri, e)) + % (obfuscated_uri, e)) - log.info("URL is a valid Mercurial repository: %s", cleaned_uri) + log.info("URL is a valid Mercurial repository: %s", obfuscated_uri) return True @reraise_safe_exceptions @@ -483,7 +513,7 @@ class HgRemote(RemoteBase): try: diff_iter = patch.diff( repo, node1=commit_id_1, node2=commit_id_2, match=match_filter, opts=opts) - return BinaryEnvelope(b"".join(diff_iter)) + return BytesEnvelope(b"".join(diff_iter)) except RepoLookupError as e: raise exceptions.LookupException(e)() @@ -539,6 +569,25 @@ class HgRemote(RemoteBase): return _node_history_until(context_uid, repo_id, revision, path, limit) @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return BinaryEnvelope(result) + + return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load)) + + @reraise_safe_exceptions def fctx_annotate(self, wire, revision, path): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) @@ -557,7 +606,7 @@ class HgRemote(RemoteBase): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) fctx = ctx.filectx(safe_bytes(path)) - return BinaryEnvelope(fctx.data()) + return BytesEnvelope(fctx.data()) @reraise_safe_exceptions def fctx_flags(self, wire, commit_id, path): @@ -674,7 +723,6 @@ class HgRemote(RemoteBase): @region.conditional_cache_on_arguments(condition=cache_on) def _lookup(_context_uid, _repo_id, _revision, _both): - repo = self._factory.repo(wire) rev = _revision if isinstance(rev, int): @@ -949,35 +997,38 @@ class HgRemote(RemoteBase): # Mercurial internally has a lot of logic that checks ONLY if # option is defined, we just pass those if they are defined then opts = {} + if bookmark: - if isinstance(branch, list): - bookmark = [safe_bytes(x) for x in bookmark] - else: - bookmark = safe_bytes(bookmark) - opts['bookmark'] = bookmark + opts['bookmark'] = [safe_bytes(x) for x in bookmark] \ + if isinstance(bookmark, list) else safe_bytes(bookmark) + if branch: - if isinstance(branch, list): - branch = [safe_bytes(x) for x in branch] - else: - branch = safe_bytes(branch) - opts['branch'] = branch + opts['branch'] = [safe_bytes(x) for x in branch] \ + if isinstance(branch, list) else safe_bytes(branch) + if revision: - opts['rev'] = safe_bytes(revision) + opts['rev'] = [safe_bytes(x) for x in revision] \ + if isinstance(revision, list) else safe_bytes(revision) commands.pull(baseui, repo, source, **opts) @reraise_safe_exceptions - def push(self, wire, revisions, dest_path, hooks=True, push_branches=False): + def push(self, wire, revisions, dest_path, hooks: bool = True, push_branches: bool = False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config'], hooks=hooks) - commands.push(baseui, repo, dest=dest_path, rev=revisions, + + revisions = [safe_bytes(x) for x in revisions] \ + if isinstance(revisions, list) else safe_bytes(revisions) + + commands.push(baseui, repo, safe_bytes(dest_path), + rev=revisions, new_branch=push_branches) @reraise_safe_exceptions def strip(self, wire, revision, update, backup): repo = self._factory.repo(wire) ctx = self._get_ctx(repo, revision) - hgext_strip( + hgext_strip.strip( repo.baseui, repo, ctx.node(), update=update, backup=backup) @reraise_safe_exceptions @@ -1008,7 +1059,7 @@ class HgRemote(RemoteBase): # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. repo.ui.setconfig(b'ui', b'interactive', False) - commands.merge(baseui, repo, rev=revision) + commands.merge(baseui, repo, rev=safe_bytes(revision)) @reraise_safe_exceptions def merge_state(self, wire): @@ -1027,11 +1078,11 @@ class HgRemote(RemoteBase): def commit(self, wire, message, username, close_branch=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) - repo.ui.setconfig(b'ui', b'username', username) - commands.commit(baseui, repo, message=message, close_branch=close_branch) + repo.ui.setconfig(b'ui', b'username', safe_bytes(username)) + commands.commit(baseui, repo, message=safe_bytes(message), close_branch=close_branch) @reraise_safe_exceptions - def rebase(self, wire, source=None, dest=None, abort=False): + def rebase(self, wire, source='', dest='', abort=False): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) repo.ui.setconfig(b'ui', b'merge', b'internal:dump') @@ -1040,7 +1091,9 @@ class HgRemote(RemoteBase): # setting the interactive flag to `False` mercurial doesn't prompt the # used but instead uses a default value. repo.ui.setconfig(b'ui', b'interactive', False) - rebase.rebase(baseui, repo, base=source, dest=dest, abort=abort, keep=not abort) + + rebase.rebase(baseui, repo, base=safe_bytes(source or ''), dest=safe_bytes(dest or ''), + abort=abort, keep=not abort) @reraise_safe_exceptions def tag(self, wire, name, revision, message, local, user, tag_time, tag_timezone): @@ -1050,7 +1103,7 @@ class HgRemote(RemoteBase): date = (tag_time, tag_timezone) try: - hg_tag.tag(repo, name, node, message, local, user, date) + hg_tag.tag(repo, safe_bytes(name), node, safe_bytes(message), local, safe_bytes(user), date) except Abort as e: log.exception("Tag operation aborted") # Exception can contain unicode which we convert @@ -1060,6 +1113,7 @@ class HgRemote(RemoteBase): def bookmark(self, wire, bookmark, revision=''): repo = self._factory.repo(wire) baseui = self._factory._create_config(wire['config']) + revision = revision or '' commands.bookmark(baseui, repo, safe_bytes(bookmark), rev=safe_bytes(revision), force=True) @reraise_safe_exceptions @@ -1079,8 +1133,8 @@ class HgRemote(RemoteBase): pass @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def file_walker(_commit_id, path): repo = self._factory.repo(wire) @@ -1100,6 +1154,6 @@ class HgRemote(RemoteBase): yield ArchiveNode(file_path, mode, is_link, ctx[fn].data) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) diff --git a/vcsserver/remote/svn.py b/vcsserver/remote/svn.py --- a/vcsserver/remote/svn.py +++ b/vcsserver/remote/svn.py @@ -37,9 +37,10 @@ import svn.fs # noqa import svn.repos # noqa from vcsserver import svn_diff, exceptions, subprocessio, settings -from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, archive_repo, BinaryEnvelope +from vcsserver.base import RepoFactory, raise_from_original, ArchiveNode, store_archive_in_cache, BytesEnvelope, BinaryEnvelope from vcsserver.exceptions import NoContentException from vcsserver.str_utils import safe_str, safe_bytes +from vcsserver.type_utils import assert_bytes from vcsserver.vcs_base import RemoteBase from vcsserver.lib.svnremoterepo import svnremoterepo log = logging.getLogger(__name__) @@ -109,6 +110,39 @@ class SvnRemote(RemoteBase): def __init__(self, factory, hg_factory=None): self._factory = factory + self._bulk_methods = { + # NOT supported in SVN ATM... + } + self._bulk_file_methods = { + "size": self.get_file_size, + "data": self.get_file_content, + "flags": self.get_node_type, + "is_binary": self.is_binary, + "md5": self.md5_hash + } + + @reraise_safe_exceptions + def bulk_file_request(self, wire, commit_id, path, pre_load): + cache_on, context_uid, repo_id = self._cache_on(wire) + region = self._region(wire) + + # since we use unified API, we need to cast from str to in for SVN + commit_id = int(commit_id) + + @region.conditional_cache_on_arguments(condition=cache_on) + def _bulk_file_request(_repo_id, _commit_id, _path, _pre_load): + result = {} + for attr in pre_load: + try: + method = self._bulk_file_methods[attr] + wire.update({'cache': False}) # disable cache for bulk calls so we don't double cache + result[attr] = method(wire, _commit_id, _path) + except KeyError as e: + raise exceptions.VcsException(e)(f'Unknown bulk attribute: "{attr}"') + return BinaryEnvelope(result) + + return _bulk_file_request(repo_id, commit_id, path, sorted(pre_load)) + @reraise_safe_exceptions def discover_svn_version(self): try: @@ -120,25 +154,23 @@ class SvnRemote(RemoteBase): @reraise_safe_exceptions def is_empty(self, wire): - try: return self.lookup(wire, -1) == 0 except Exception: log.exception("failed to read object_store") return False - def check_url(self, url): + def check_url(self, url, config): - # uuid function get's only valid UUID from proper repo, else + # uuid function gets only valid UUID from proper repo, else # throws exception username, password, src_url = self.get_url_and_credentials(url) try: - svnremoterepo(username, password, src_url).svn().uuid + svnremoterepo(safe_bytes(username), safe_bytes(password), safe_bytes(src_url)).svn().uuid except Exception: tb = traceback.format_exc() log.debug("Invalid Subversion url: `%s`, tb: %s", url, tb) - raise URLError( - '"{}" is not a valid Subversion source url.'.format(url)) + raise URLError(f'"{url}" is not a valid Subversion source url.') return True def is_path_valid_repository(self, wire, path): @@ -169,6 +201,7 @@ class SvnRemote(RemoteBase): stdout, stderr = subprocessio.run_command(cmd) return stdout + @reraise_safe_exceptions def lookup(self, wire, revision): if revision not in [-1, None, 'HEAD']: raise NotImplementedError @@ -177,6 +210,7 @@ class SvnRemote(RemoteBase): head = svn.fs.youngest_rev(fs_ptr) return head + @reraise_safe_exceptions def lookup_interval(self, wire, start_ts, end_ts): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) @@ -194,10 +228,12 @@ class SvnRemote(RemoteBase): end_rev = svn.fs.youngest_rev(fsobj) return start_rev, end_rev + @reraise_safe_exceptions def revision_properties(self, wire, revision): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _revision_properties(_repo_id, _revision): repo = self._factory.repo(wire) @@ -253,6 +289,7 @@ class SvnRemote(RemoteBase): def node_history(self, wire, path, revision, limit): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) + @region.conditional_cache_on_arguments(condition=cache_on) def _assert_correct_path(_context_uid, _repo_id, _path, _revision, _limit): cross_copies = False @@ -272,6 +309,7 @@ class SvnRemote(RemoteBase): return history_revisions return _assert_correct_path(context_uid, repo_id, path, revision, limit) + @reraise_safe_exceptions def node_properties(self, wire, path, revision): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) @@ -311,13 +349,14 @@ class SvnRemote(RemoteBase): return annotations - def get_node_type(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_node_type(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) - def _get_node_type(_repo_id, _path, _revision): + def _get_node_type(_repo_id, _revision, _path): repo = self._factory.repo(wire) fs_ptr = svn.repos.fs(repo) if _revision is None: @@ -325,9 +364,10 @@ class SvnRemote(RemoteBase): root = svn.fs.revision_root(fs_ptr, _revision) node = svn.fs.check_path(root, path) return NODE_TYPE_MAPPING.get(node, None) - return _get_node_type(repo_id, path, revision) + return _get_node_type(repo_id, revision, path) - def get_nodes(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_nodes(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) @@ -347,22 +387,26 @@ class SvnRemote(RemoteBase): return result return _get_nodes(repo_id, path, revision) - def get_file_content(self, wire, path, rev=None): + @reraise_safe_exceptions + def get_file_content(self, wire, rev=None, path=''): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) + if rev is None: - rev = svn.fs.youngest_revision(fsobj) + rev = svn.fs.youngest_rev(fsobj) + root = svn.fs.revision_root(fsobj, rev) content = svn.core.Stream(svn.fs.file_contents(root, path)) - return BinaryEnvelope(content.read()) + return BytesEnvelope(content.read()) - def get_file_size(self, wire, path, revision=None): + @reraise_safe_exceptions + def get_file_size(self, wire, revision=None, path=''): cache_on, context_uid, repo_id = self._cache_on(wire) region = self._region(wire) @region.conditional_cache_on_arguments(condition=cache_on) - def _get_file_size(_repo_id, _path, _revision): + def _get_file_size(_repo_id, _revision, _path): repo = self._factory.repo(wire) fsobj = svn.repos.fs(repo) if _revision is None: @@ -370,17 +414,17 @@ class SvnRemote(RemoteBase): root = svn.fs.revision_root(fsobj, _revision) size = svn.fs.file_length(root, path) return size - return _get_file_size(repo_id, path, revision) + return _get_file_size(repo_id, revision, path) def create_repository(self, wire, compatible_version=None): log.info('Creating Subversion repository in path "%s"', wire['path']) self._factory.repo(wire, create=True, compatible_version=compatible_version) - def get_url_and_credentials(self, src_url): + def get_url_and_credentials(self, src_url) -> tuple[str, str, str]: obj = urllib.parse.urlparse(src_url) - username = obj.username or None - password = obj.password or None + username = obj.username or '' + password = obj.password or '' return username, password, src_url def import_remote_repository(self, wire, src_url): @@ -430,8 +474,6 @@ class SvnRemote(RemoteBase): def commit(self, wire, message, author, timestamp, updated, removed): - updated = [{k: safe_bytes(v) for k, v in x.items() if isinstance(v, str)} for x in updated] - message = safe_bytes(message) author = safe_bytes(author) @@ -450,13 +492,14 @@ class SvnRemote(RemoteBase): commit_id = svn.repos.fs_commit_txn(repo, txn) if timestamp: - apr_time = int(apr_time_t(timestamp)) + apr_time = apr_time_t(timestamp) ts_formatted = svn.core.svn_time_to_cstring(apr_time) svn.fs.change_rev_prop(fsobj, commit_id, 'svn:date', ts_formatted) log.debug('Committed revision "%s" to "%s".', commit_id, wire['path']) return commit_id + @reraise_safe_exceptions def diff(self, wire, rev1, rev2, path1=None, path2=None, ignore_whitespace=False, context=3): @@ -465,12 +508,12 @@ class SvnRemote(RemoteBase): diff_creator = SvnDiffer( repo, rev1, path1, rev2, path2, ignore_whitespace, context) try: - return BinaryEnvelope(diff_creator.generate_diff()) + return BytesEnvelope(diff_creator.generate_diff()) except svn.core.SubversionException as e: log.exception( "Error during diff operation operation. " "Path might not exist %s, %s", path1, path2) - return BinaryEnvelope(b'') + return BytesEnvelope(b'') @reraise_safe_exceptions def is_large_file(self, wire, path): @@ -483,8 +526,10 @@ class SvnRemote(RemoteBase): @region.conditional_cache_on_arguments(condition=cache_on) def _is_binary(_repo_id, _rev, _path): - raw_bytes = self.get_file_content(wire, path, rev) - return raw_bytes and b'\0' in raw_bytes + raw_bytes = self.get_file_content(wire, rev, path) + if not raw_bytes: + return False + return b'\0' in raw_bytes return _is_binary(repo_id, rev, path) @@ -555,8 +600,8 @@ class SvnRemote(RemoteBase): pass @reraise_safe_exceptions - def archive_repo(self, wire, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id): + def archive_repo(self, wire, archive_name_key, kind, mtime, archive_at_path, + archive_dir_name, commit_id, cache_config): def walk_tree(root, root_dir, _commit_id): """ @@ -616,8 +661,8 @@ class SvnRemote(RemoteBase): data_stream = f_data['content_stream'] yield ArchiveNode(file_path, mode, is_link, data_stream) - return archive_repo(file_walker, archive_dest_path, kind, mtime, archive_at_path, - archive_dir_name, commit_id) + return store_archive_in_cache( + file_walker, archive_name_key, kind, mtime, archive_at_path, archive_dir_name, commit_id, cache_config=cache_config) class SvnDiffer(object): @@ -658,15 +703,15 @@ class SvnDiffer(object): "Source type: %s, target type: %s" % (self.src_kind, self.tgt_kind)) - def generate_diff(self): - buf = io.StringIO() + def generate_diff(self) -> bytes: + buf = io.BytesIO() if self.tgt_kind == svn.core.svn_node_dir: self._generate_dir_diff(buf) else: self._generate_file_diff(buf) return buf.getvalue() - def _generate_dir_diff(self, buf): + def _generate_dir_diff(self, buf: io.BytesIO): editor = DiffChangeEditor() editor_ptr, editor_baton = svn.delta.make_editor(editor) svn.repos.dir_delta2( @@ -687,7 +732,7 @@ class SvnDiffer(object): self._generate_node_diff( buf, change, path, self.tgt_path, path, self.src_path) - def _generate_file_diff(self, buf): + def _generate_file_diff(self, buf: io.BytesIO): change = None if self.src_kind == svn.core.svn_node_none: change = "add" @@ -699,13 +744,14 @@ class SvnDiffer(object): buf, change, tgt_path, tgt_base, src_path, src_base) def _generate_node_diff( - self, buf, change, tgt_path, tgt_base, src_path, src_base): - + self, buf: io.BytesIO, change, tgt_path, tgt_base, src_path, src_base): + tgt_path_bytes = safe_bytes(tgt_path) tgt_path = safe_str(tgt_path) + + src_path_bytes = safe_bytes(src_path) src_path = safe_str(src_path) - if self.src_rev == self.tgt_rev and tgt_base == src_base: # makes consistent behaviour with git/hg to return empty diff if # we compare same revisions @@ -717,46 +763,45 @@ class SvnDiffer(object): self.binary_content = False mime_type = self._get_mime_type(tgt_full_path) - if mime_type and not mime_type.startswith('text'): + if mime_type and not mime_type.startswith(b'text'): self.binary_content = True - buf.write("=" * 67 + '\n') - buf.write("Cannot display: file marked as a binary type.\n") - buf.write("svn:mime-type = %s\n" % mime_type) - buf.write("Index: {}\n".format(tgt_path)) - buf.write("=" * 67 + '\n') - buf.write("diff --git a/{tgt_path} b/{tgt_path}\n".format( - tgt_path=tgt_path)) + buf.write(b"=" * 67 + b'\n') + buf.write(b"Cannot display: file marked as a binary type.\n") + buf.write(b"svn:mime-type = %s\n" % mime_type) + buf.write(b"Index: %b\n" % tgt_path_bytes) + buf.write(b"=" * 67 + b'\n') + buf.write(b"diff --git a/%b b/%b\n" % (tgt_path_bytes, tgt_path_bytes)) if change == 'add': # TODO: johbo: SVN is missing a zero here compared to git - buf.write("new file mode 10644\n") + buf.write(b"new file mode 10644\n") + + # TODO(marcink): intro to binary detection of svn patches + # if self.binary_content: + # buf.write(b'GIT binary patch\n') - #TODO(marcink): intro to binary detection of svn patches + buf.write(b"--- /dev/null\t(revision 0)\n") + src_lines = [] + else: + if change == 'delete': + buf.write(b"deleted file mode 10644\n") + + # TODO(marcink): intro to binary detection of svn patches # if self.binary_content: # buf.write('GIT binary patch\n') - buf.write("--- /dev/null\t(revision 0)\n") - src_lines = [] - else: - if change == 'delete': - buf.write("deleted file mode 10644\n") - - #TODO(marcink): intro to binary detection of svn patches - # if self.binary_content: - # buf.write('GIT binary patch\n') - - buf.write("--- a/{}\t(revision {})\n".format( - src_path, self.src_rev)) + buf.write(b"--- a/%b\t(revision %d)\n" % (src_path_bytes, self.src_rev)) src_lines = self._svn_readlines(self.src_root, src_full_path) if change == 'delete': - buf.write("+++ /dev/null\t(revision {})\n".format(self.tgt_rev)) + buf.write(b"+++ /dev/null\t(revision %d)\n" % self.tgt_rev) tgt_lines = [] else: - buf.write("+++ b/{}\t(revision {})\n".format( - tgt_path, self.tgt_rev)) + buf.write(b"+++ b/%b\t(revision %d)\n" % (tgt_path_bytes, self.tgt_rev)) tgt_lines = self._svn_readlines(self.tgt_root, tgt_full_path) + # we made our diff header, time to generate the diff content into our buffer + if not self.binary_content: udiff = svn_diff.unified_diff( src_lines, tgt_lines, context=self.context, @@ -766,7 +811,7 @@ class SvnDiffer(object): buf.writelines(udiff) - def _get_mime_type(self, path): + def _get_mime_type(self, path) -> bytes: try: mime_type = svn.fs.node_prop( self.tgt_root, path, svn.core.SVN_PROP_MIME_TYPE) @@ -822,7 +867,7 @@ class TxnNodeProcessor(object): """ def __init__(self, node, txn_root): - assert isinstance(node['path'], bytes) + assert_bytes(node['path']) self.node = node self.txn_root = txn_root @@ -858,7 +903,7 @@ class TxnNodeProcessor(object): svn.fs.make_file(self.txn_root, self.node['path']) def _update_file_content(self): - assert isinstance(self.node['content'], bytes) + assert_bytes(self.node['content']) handler, baton = svn.fs.apply_textdelta( self.txn_root, self.node['path'], None, None) @@ -868,14 +913,14 @@ class TxnNodeProcessor(object): properties = self.node.get('properties', {}) for key, value in properties.items(): svn.fs.change_node_prop( - self.txn_root, self.node['path'], key, value) + self.txn_root, self.node['path'], safe_bytes(key), safe_bytes(value)) def apr_time_t(timestamp): """ Convert a Python timestamp into APR timestamp type apr_time_t """ - return timestamp * 1E6 + return int(timestamp * 1E6) def svn_opt_revision_value_t(num): diff --git a/vcsserver/svn_diff.py b/vcsserver/svn_diff.py --- a/vcsserver/svn_diff.py +++ b/vcsserver/svn_diff.py @@ -16,15 +16,15 @@ import difflib -def get_filtered_hunks(fromlines, tolines, context=None, - ignore_blank_lines=False, ignore_case=False, - ignore_space_changes=False): +def get_filtered_hunks(from_lines, to_lines, context=None, + ignore_blank_lines: bool = False, ignore_case: bool = False, + ignore_space_changes: bool = False): """Retrieve differences in the form of `difflib.SequenceMatcher` opcodes, grouped according to the ``context`` and ``ignore_*`` parameters. - :param fromlines: list of lines corresponding to the old content - :param tolines: list of lines corresponding to the new content + :param from_lines: list of lines corresponding to the old content + :param to_lines: list of lines corresponding to the new content :param ignore_blank_lines: differences about empty lines only are ignored :param ignore_case: upper case / lower case only differences are ignored :param ignore_space_changes: differences in amount of spaces are ignored @@ -36,27 +36,27 @@ def get_filtered_hunks(fromlines, toline to filter out the results will come straight from the SequenceMatcher. """ - hunks = get_hunks(fromlines, tolines, context) + hunks = get_hunks(from_lines, to_lines, context) if ignore_space_changes or ignore_case or ignore_blank_lines: - hunks = filter_ignorable_lines(hunks, fromlines, tolines, context, + hunks = filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes) return hunks -def get_hunks(fromlines, tolines, context=None): +def get_hunks(from_lines, to_lines, context=None): """Generator yielding grouped opcodes describing differences . See `get_filtered_hunks` for the parameter descriptions. """ - matcher = difflib.SequenceMatcher(None, fromlines, tolines) + matcher = difflib.SequenceMatcher(None, from_lines, to_lines) if context is None: return (hunk for hunk in [matcher.get_opcodes()]) else: return matcher.get_grouped_opcodes(context) -def filter_ignorable_lines(hunks, fromlines, tolines, context, +def filter_ignorable_lines(hunks, from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): """Detect line changes that should be ignored and emits them as @@ -66,11 +66,12 @@ def filter_ignorable_lines(hunks, fromli See `get_filtered_hunks` for the parameter descriptions. """ def is_ignorable(tag, fromlines, tolines): + if tag == 'delete' and ignore_blank_lines: - if ''.join(fromlines) == '': + if b''.join(fromlines) == b'': return True elif tag == 'insert' and ignore_blank_lines: - if ''.join(tolines) == '': + if b''.join(tolines) == b'': return True elif tag == 'replace' and (ignore_case or ignore_space_changes): if len(fromlines) != len(tolines): @@ -80,7 +81,7 @@ def filter_ignorable_lines(hunks, fromli if ignore_case: input_str = input_str.lower() if ignore_space_changes: - input_str = ' '.join(input_str.split()) + input_str = b' '.join(input_str.split()) return input_str for i in range(len(fromlines)): @@ -100,7 +101,7 @@ def filter_ignorable_lines(hunks, fromli else: prev = (tag, i1, i2, j1, j2) else: - if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]): + if is_ignorable(tag, from_lines[i1:i2], to_lines[j1:j2]): ignored_lines = True if prev: prev = 'equal', prev[1], i2, prev[3], j2 @@ -124,10 +125,11 @@ def filter_ignorable_lines(hunks, fromli nn = n + n group = [] + def all_equal(): all(op[0] == 'equal' for op in group) for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes): - if idx == 0 and tag == 'equal': # Fixup leading unchanged block + if idx == 0 and tag == 'equal': # Fixup leading unchanged block i1, j1 = max(i1, i2 - n), max(j1, j2 - n) elif tag == 'equal' and i2 - i1 > nn: group.append((tag, i1, min(i2, i1 + n), j1, @@ -139,7 +141,7 @@ def filter_ignorable_lines(hunks, fromli group.append((tag, i1, i2, j1, j2)) if group and not (len(group) == 1 and group[0][0] == 'equal'): - if group[-1][0] == 'equal': # Fixup trailing unchanged block + if group[-1][0] == 'equal': # Fixup trailing unchanged block tag, i1, i2, j1, j2 = group[-1] group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n) if not all_equal(): @@ -149,11 +151,12 @@ def filter_ignorable_lines(hunks, fromli yield hunk -NO_NEWLINE_AT_END = '\\ No newline at end of file' +NO_NEWLINE_AT_END = b'\\ No newline at end of file' +LINE_TERM = b'\n' -def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0, - ignore_case=0, ignore_space_changes=0, lineterm='\n'): +def unified_diff(from_lines, to_lines, context=None, ignore_blank_lines: bool = False, + ignore_case: bool = False, ignore_space_changes: bool = False, lineterm=LINE_TERM) -> bytes: """ Generator producing lines corresponding to a textual diff. @@ -162,10 +165,16 @@ def unified_diff(fromlines, tolines, con # TODO: johbo: Check if this can be nicely integrated into the matching if ignore_space_changes: - fromlines = [l.strip() for l in fromlines] - tolines = [l.strip() for l in tolines] + from_lines = [l.strip() for l in from_lines] + to_lines = [l.strip() for l in to_lines] - for group in get_filtered_hunks(fromlines, tolines, context, + def _hunk_range(start, length) -> bytes: + if length != 1: + return b'%d,%d' % (start, length) + else: + return b'%d' % (start,) + + for group in get_filtered_hunks(from_lines, to_lines, context, ignore_blank_lines, ignore_case, ignore_space_changes): i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] @@ -173,37 +182,30 @@ def unified_diff(fromlines, tolines, con i1, i2 = -1, -1 # support for Add changes if j1 == 0 and j2 == 0: j1, j2 = -1, -1 # support for Delete changes - yield '@@ -{} +{} @@{}'.format( + yield b'@@ -%b +%b @@%b' % ( _hunk_range(i1 + 1, i2 - i1), _hunk_range(j1 + 1, j2 - j1), lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': - for line in fromlines[i1:i2]: + for line in from_lines[i1:i2]: if not line.endswith(lineterm): - yield ' ' + line + lineterm + yield b' ' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield ' ' + line + yield b' ' + line else: if tag in ('replace', 'delete'): - for line in fromlines[i1:i2]: + for line in from_lines[i1:i2]: if not line.endswith(lineterm): - yield '-' + line + lineterm + yield b'-' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield '-' + line + yield b'-' + line if tag in ('replace', 'insert'): - for line in tolines[j1:j2]: + for line in to_lines[j1:j2]: if not line.endswith(lineterm): - yield '+' + line + lineterm + yield b'+' + line + lineterm yield NO_NEWLINE_AT_END + lineterm else: - yield '+' + line - - -def _hunk_range(start, length): - if length != 1: - return '%d,%d' % (start, length) - else: - return '%d' % (start, ) + yield b'+' + line diff --git a/vcsserver/tests/test_svn.py b/vcsserver/tests/test_svn.py --- a/vcsserver/tests/test_svn.py +++ b/vcsserver/tests/test_svn.py @@ -69,11 +69,11 @@ def test_svn_libraries_can_be_imported() @pytest.mark.parametrize('example_url, parts', [ - ('http://server.com', (None, None, 'http://server.com')), - ('http://user@server.com', ('user', None, 'http://user@server.com')), + ('http://server.com', ('', '', 'http://server.com')), + ('http://user@server.com', ('user', '', 'http://user@server.com')), ('http://user:pass@server.com', ('user', 'pass', 'http://user:pass@server.com')), - ('