##// END OF EJS Templates
vcsserver: add debug information to some of the errors that might happen.
vcsserver: add debug information to some of the errors that might happen.

File last commit:

r182:5af7fc0d default
r217:a2c26a72 default
Show More
git.py
644 lines | 21.7 KiB | text/x-python | PythonLexer
initial commit
r0 # RhodeCode VCSServer provides access to different vcs backends via network.
license: updated copyright year to 2017
r149 # Copyright (C) 2014-2017 RodeCode GmbH
initial commit
r0 #
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
import logging
import os
import posixpath as vcspath
import re
import stat
import urllib
import urllib2
from functools import wraps
from dulwich import index, objects
from dulwich.client import HttpGitClient, LocalGitClient
from dulwich.errors import (
NotGitRepository, ChecksumMismatch, WrongObjectException,
MissingCommitError, ObjectMissing, HangupException,
UnexpectedCommandError)
from dulwich.repo import Repo as DulwichRepo, Tag
from dulwich.server import update_server_info
from vcsserver import exceptions, settings, subprocessio
from vcsserver.utils import safe_str
exception-handling: better handling of remote exception and logging....
r171 from vcsserver.base import RepoFactory, obfuscate_qs, raise_from_original
initial commit
r0 from vcsserver.hgcompat import (
remote-clones: make sure we always use obfuscated url inside logs....
r105 hg_url as url_parser, httpbasicauthhandler, httpdigestauthhandler)
largefiles: added handling of detection and fetching of largefiles....
r182 from vcsserver.git_lfs.lib import LFSOidStore
initial commit
r0
DIR_STAT = stat.S_IFDIR
FILE_MODE = stat.S_IFMT
GIT_LINK = objects.S_IFGITLINK
log = logging.getLogger(__name__)
def reraise_safe_exceptions(func):
"""Converts Dulwich exceptions to something neutral."""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except (ChecksumMismatch, WrongObjectException, MissingCommitError,
ObjectMissing) as e:
raise exceptions.LookupException(e.message)
except (HangupException, UnexpectedCommandError) as e:
raise exceptions.VcsException(e.message)
exception-handling: better handling of remote exception and logging....
r171 except Exception as e:
exceptions: for dulwich we cannot catch all exceptions because it re-uses some...
r172 # NOTE(marcink): becuase of how dulwich handles some exceptions
# (KeyError on empty repos), we cannot track this and catch all
# exceptions, it's an exceptions from other handlers
#if not hasattr(e, '_vcs_kind'):
#log.exception("Unhandled exception in git remote call")
#raise_from_original(exceptions.UnhandledException)
exception-handling: better handling of remote exception and logging....
r171 raise
initial commit
r0 return wrapper
class Repo(DulwichRepo):
"""
A wrapper for dulwich Repo class.
Since dulwich is sometimes keeping .idx file descriptors open, it leads to
"Too many open files" error. We need to close all opened file descriptors
once the repo object is destroyed.
TODO: mikhail: please check if we need this wrapper after updating dulwich
to 0.12.0 +
"""
def __del__(self):
if hasattr(self, 'object_store'):
self.close()
class GitFactory(RepoFactory):
def _create_repo(self, wire, create):
repo_path = str_to_dulwich(wire['path'])
return Repo(repo_path)
class GitRemote(object):
def __init__(self, factory):
self._factory = factory
self._bulk_methods = {
"author": self.commit_attribute,
"date": self.get_object_attrs,
"message": self.commit_attribute,
"parents": self.commit_attribute,
"_commit": self.revision,
}
largefiles: added handling of detection and fetching of largefiles....
r182 def _wire_to_config(self, wire):
if 'config' in wire:
return dict([(x[0] + '_' + x[1], x[2]) for x in wire['config']])
return {}
initial commit
r0 def _assign_ref(self, wire, ref, commit_id):
repo = self._factory.repo(wire)
repo[ref] = commit_id
@reraise_safe_exceptions
def add_object(self, wire, content):
repo = self._factory.repo(wire)
blob = objects.Blob()
blob.set_raw_string(content)
repo.object_store.add_object(blob)
return blob.id
@reraise_safe_exceptions
def assert_correct_path(self, wire):
try:
self._factory.repo(wire)
except NotGitRepository as e:
# Exception can contain unicode which we convert
raise exceptions.AbortException(repr(e))
@reraise_safe_exceptions
def bare(self, wire):
repo = self._factory.repo(wire)
return repo.bare
@reraise_safe_exceptions
def blob_as_pretty_string(self, wire, sha):
repo = self._factory.repo(wire)
return repo[sha].as_pretty_string()
@reraise_safe_exceptions
def blob_raw_length(self, wire, sha):
repo = self._factory.repo(wire)
blob = repo[sha]
return blob.raw_length()
largefiles: added handling of detection and fetching of largefiles....
r182 def _parse_lfs_pointer(self, raw_content):
spec_string = 'version https://git-lfs.github.com/spec'
if raw_content and raw_content.startswith(spec_string):
pattern = re.compile(r"""
(?:\n)?
^version[ ]https://git-lfs\.github\.com/spec/(?P<spec_ver>v\d+)\n
^oid[ ] sha256:(?P<oid_hash>[0-9a-f]{64})\n
^size[ ](?P<oid_size>[0-9]+)\n
(?:\n)?
""", re.VERBOSE | re.MULTILINE)
match = pattern.match(raw_content)
if match:
return match.groupdict()
return {}
@reraise_safe_exceptions
def is_large_file(self, wire, sha):
repo = self._factory.repo(wire)
blob = repo[sha]
return self._parse_lfs_pointer(blob.as_raw_string())
@reraise_safe_exceptions
def in_largefiles_store(self, wire, oid):
repo = self._factory.repo(wire)
conf = self._wire_to_config(wire)
store_location = conf.get('vcs_git_lfs_store_location')
if store_location:
repo_name = repo.path
store = LFSOidStore(
oid=oid, repo=repo_name, store_location=store_location)
return store.has_oid()
return False
@reraise_safe_exceptions
def store_path(self, wire, oid):
repo = self._factory.repo(wire)
conf = self._wire_to_config(wire)
store_location = conf.get('vcs_git_lfs_store_location')
if store_location:
repo_name = repo.path
store = LFSOidStore(
oid=oid, repo=repo_name, store_location=store_location)
return store.oid_path
raise ValueError('Unable to fetch oid with path {}'.format(oid))
initial commit
r0 @reraise_safe_exceptions
def bulk_request(self, wire, rev, pre_load):
result = {}
for attr in pre_load:
try:
method = self._bulk_methods[attr]
args = [wire, rev]
if attr == "date":
args.extend(["commit_time", "commit_timezone"])
elif attr in ["author", "message", "parents"]:
args.append(attr)
result[attr] = method(*args)
except KeyError:
raise exceptions.VcsException(
"Unknown bulk attribute: %s" % attr)
return result
def _build_opener(self, url):
handlers = []
remote-clones: make sure we always use obfuscated url inside logs....
r105 url_obj = url_parser(url)
initial commit
r0 _, authinfo = url_obj.authinfo()
if authinfo:
# create a password manager
passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
passmgr.add_password(*authinfo)
handlers.extend((httpbasicauthhandler(passmgr),
httpdigestauthhandler(passmgr)))
return urllib2.build_opener(*handlers)
@reraise_safe_exceptions
def check_url(self, url, config):
remote-clones: make sure we always use obfuscated url inside logs....
r105 url_obj = url_parser(url)
initial commit
r0 test_uri, _ = url_obj.authinfo()
obfuscation: don't always set passwd to obfuscated text. In case there's...
r114 url_obj.passwd = '*****' if url_obj.passwd else url_obj.passwd
remote-clone: obfuscate also given query string paramas that RhodeCode uses. Fixes #4668
r106 url_obj.query = obfuscate_qs(url_obj.query)
initial commit
r0 cleaned_uri = str(url_obj)
remote-clones: make sure we always use obfuscated url inside logs....
r105 log.info("Checking URL for remote cloning/import: %s", cleaned_uri)
initial commit
r0
if not test_uri.endswith('info/refs'):
test_uri = test_uri.rstrip('/') + '/info/refs'
o = self._build_opener(url)
o.addheaders = [('User-Agent', 'git/1.7.8.0')] # fake some git
q = {"service": 'git-upload-pack'}
qs = '?%s' % urllib.urlencode(q)
cu = "%s%s" % (test_uri, qs)
req = urllib2.Request(cu, None, {})
try:
remote-clones: make sure we always use obfuscated url inside logs....
r105 log.debug("Trying to open URL %s", cleaned_uri)
initial commit
r0 resp = o.open(req)
if resp.code != 200:
remote-clones: make sure we always use obfuscated url inside logs....
r105 raise exceptions.URLError('Return Code is not 200')
initial commit
r0 except Exception as e:
remote-clones: make sure we always use obfuscated url inside logs....
r105 log.warning("URL cannot be opened: %s", cleaned_uri, exc_info=True)
initial commit
r0 # means it cannot be cloned
remote-clones: make sure we always use obfuscated url inside logs....
r105 raise exceptions.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
initial commit
r0
# now detect if it's proper git repo
gitdata = resp.read()
if 'service=git-upload-pack' in gitdata:
pass
elif re.findall(r'[0-9a-fA-F]{40}\s+refs', gitdata):
# old style git can return some other format !
pass
else:
remote-clones: make sure we always use obfuscated url inside logs....
r105 raise exceptions.URLError(
initial commit
r0 "url [%s] does not look like an git" % (cleaned_uri,))
return True
@reraise_safe_exceptions
def clone(self, wire, url, deferred, valid_refs, update_after_clone):
remote_refs = self.fetch(wire, url, apply_refs=False)
repo = self._factory.repo(wire)
if isinstance(valid_refs, list):
valid_refs = tuple(valid_refs)
for k in remote_refs:
# only parse heads/tags and skip so called deferred tags
if k.startswith(valid_refs) and not k.endswith(deferred):
repo[k] = remote_refs[k]
if update_after_clone:
# we want to checkout HEAD
repo["HEAD"] = remote_refs["HEAD"]
index.build_index_from_tree(repo.path, repo.index_path(),
repo.object_store, repo["HEAD"].tree)
# TODO: this is quite complex, check if that can be simplified
@reraise_safe_exceptions
def commit(self, wire, commit_data, branch, commit_tree, updated, removed):
repo = self._factory.repo(wire)
object_store = repo.object_store
# Create tree and populates it with blobs
commit_tree = commit_tree and repo[commit_tree] or objects.Tree()
for node in updated:
# Compute subdirs if needed
dirpath, nodename = vcspath.split(node['path'])
dirnames = map(safe_str, dirpath and dirpath.split('/') or [])
parent = commit_tree
ancestors = [('', parent)]
# Tries to dig for the deepest existing tree
while dirnames:
curdir = dirnames.pop(0)
try:
dir_id = parent[curdir][1]
except KeyError:
# put curdir back into dirnames and stops
dirnames.insert(0, curdir)
break
else:
# If found, updates parent
parent = repo[dir_id]
ancestors.append((curdir, parent))
# Now parent is deepest existing tree and we need to create
# subtrees for dirnames (in reverse order)
# [this only applies for nodes from added]
new_trees = []
blob = objects.Blob.from_string(node['content'])
if dirnames:
# If there are trees which should be created we need to build
# them now (in reverse order)
reversed_dirnames = list(reversed(dirnames))
curtree = objects.Tree()
curtree[node['node_path']] = node['mode'], blob.id
new_trees.append(curtree)
for dirname in reversed_dirnames[:-1]:
newtree = objects.Tree()
newtree[dirname] = (DIR_STAT, curtree.id)
new_trees.append(newtree)
curtree = newtree
parent[reversed_dirnames[-1]] = (DIR_STAT, curtree.id)
else:
parent.add(
name=node['node_path'], mode=node['mode'], hexsha=blob.id)
new_trees.append(parent)
# Update ancestors
reversed_ancestors = reversed(
[(a[1], b[1], b[0]) for a, b in zip(ancestors, ancestors[1:])])
for parent, tree, path in reversed_ancestors:
parent[path] = (DIR_STAT, tree.id)
object_store.add_object(tree)
object_store.add_object(blob)
for tree in new_trees:
object_store.add_object(tree)
for node_path in removed:
paths = node_path.split('/')
tree = commit_tree
trees = [tree]
# Traverse deep into the forest...
for path in paths:
try:
obj = repo[tree[path][1]]
if isinstance(obj, objects.Tree):
trees.append(obj)
tree = obj
except KeyError:
break
# Cut down the blob and all rotten trees on the way back...
for path, tree in reversed(zip(paths, trees)):
del tree[path]
if tree:
# This tree still has elements - don't remove it or any
# of it's parents
break
object_store.add_object(commit_tree)
# Create commit
commit = objects.Commit()
commit.tree = commit_tree.id
for k, v in commit_data.iteritems():
setattr(commit, k, v)
object_store.add_object(commit)
ref = 'refs/heads/%s' % branch
repo.refs[ref] = commit.id
return commit.id
@reraise_safe_exceptions
def fetch(self, wire, url, apply_refs=True, refs=None):
if url != 'default' and '://' not in url:
client = LocalGitClient(url)
else:
remote-clones: make sure we always use obfuscated url inside logs....
r105 url_obj = url_parser(url)
initial commit
r0 o = self._build_opener(url)
url, _ = url_obj.authinfo()
client = HttpGitClient(base_url=url, opener=o)
repo = self._factory.repo(wire)
determine_wants = repo.object_store.determine_wants_all
if refs:
def determine_wants_requested(references):
return [references[r] for r in references if r in refs]
determine_wants = determine_wants_requested
try:
remote_refs = client.fetch(
path=url, target=repo, determine_wants=determine_wants)
error-handlig: pass in Abort exception info to RhodeCode from vcsserver.
r120 except NotGitRepository as e:
initial commit
r0 log.warning(
'Trying to fetch from "%s" failed, not a Git repository.', url)
error-handlig: pass in Abort exception info to RhodeCode from vcsserver.
r120 # Exception can contain unicode which we convert
raise exceptions.AbortException(repr(e))
initial commit
r0
# mikhail: client.fetch() returns all the remote refs, but fetches only
# refs filtered by `determine_wants` function. We need to filter result
# as well
if refs:
remote_refs = {k: remote_refs[k] for k in remote_refs if k in refs}
if apply_refs:
# TODO: johbo: Needs proper test coverage with a git repository
# that contains a tag object, so that we would end up with
# a peeled ref at this point.
PEELED_REF_MARKER = '^{}'
for k in remote_refs:
if k.endswith(PEELED_REF_MARKER):
log.info("Skipping peeled reference %s", k)
continue
repo[k] = remote_refs[k]
if refs:
# mikhail: explicitly set the head to the last ref.
repo['HEAD'] = remote_refs[refs[-1]]
# TODO: mikhail: should we return remote_refs here to be
# consistent?
else:
return remote_refs
@reraise_safe_exceptions
def get_remote_refs(self, wire, url):
repo = Repo(url)
return repo.get_refs()
@reraise_safe_exceptions
def get_description(self, wire):
repo = self._factory.repo(wire)
return repo.get_description()
@reraise_safe_exceptions
def get_file_history(self, wire, file_path, commit_id, limit):
repo = self._factory.repo(wire)
include = [commit_id]
paths = [file_path]
walker = repo.get_walker(include, paths=paths, max_entries=limit)
return [x.commit.id for x in walker]
@reraise_safe_exceptions
def get_missing_revs(self, wire, rev1, rev2, path2):
repo = self._factory.repo(wire)
LocalGitClient(thin_packs=False).fetch(path2, repo)
wire_remote = wire.copy()
wire_remote['path'] = path2
repo_remote = self._factory.repo(wire_remote)
LocalGitClient(thin_packs=False).fetch(wire["path"], repo_remote)
revs = [
x.commit.id
for x in repo_remote.get_walker(include=[rev2], exclude=[rev1])]
return revs
@reraise_safe_exceptions
def get_object(self, wire, sha):
repo = self._factory.repo(wire)
obj = repo.get_object(sha)
commit_id = obj.id
if isinstance(obj, Tag):
commit_id = obj.object[1]
return {
'id': obj.id,
'type': obj.type_name,
'commit_id': commit_id
}
@reraise_safe_exceptions
def get_object_attrs(self, wire, sha, *attrs):
repo = self._factory.repo(wire)
obj = repo.get_object(sha)
return list(getattr(obj, a) for a in attrs)
@reraise_safe_exceptions
dan
git: use peeled commit hashes for get_refs so that annotated tags...
r65 def get_refs(self, wire):
initial commit
r0 repo = self._factory.repo(wire)
dan
git: use peeled commit hashes for get_refs so that annotated tags...
r65 result = {}
for ref, sha in repo.refs.as_dict().items():
dan
tags: use get_peeled to get the peeled tag sha commit instead of...
r76 peeled_sha = repo.get_peeled(ref)
dan
git: use peeled commit hashes for get_refs so that annotated tags...
r65 result[ref] = peeled_sha
return result
initial commit
r0
@reraise_safe_exceptions
def get_refs_path(self, wire):
repo = self._factory.repo(wire)
return repo.refs.path
@reraise_safe_exceptions
def head(self, wire):
repo = self._factory.repo(wire)
return repo.head()
@reraise_safe_exceptions
def init(self, wire):
repo_path = str_to_dulwich(wire['path'])
self.repo = Repo.init(repo_path)
@reraise_safe_exceptions
def init_bare(self, wire):
repo_path = str_to_dulwich(wire['path'])
self.repo = Repo.init_bare(repo_path)
@reraise_safe_exceptions
def revision(self, wire, rev):
repo = self._factory.repo(wire)
obj = repo[rev]
obj_data = {
'id': obj.id,
}
try:
obj_data['tree'] = obj.tree
except AttributeError:
pass
return obj_data
@reraise_safe_exceptions
def commit_attribute(self, wire, rev, attr):
repo = self._factory.repo(wire)
obj = repo[rev]
return getattr(obj, attr)
@reraise_safe_exceptions
def set_refs(self, wire, key, value):
repo = self._factory.repo(wire)
repo.refs[key] = value
@reraise_safe_exceptions
def remove_ref(self, wire, key):
repo = self._factory.repo(wire)
del repo.refs[key]
@reraise_safe_exceptions
def tree_changes(self, wire, source_id, target_id):
repo = self._factory.repo(wire)
source = repo[source_id].tree if source_id else None
target = repo[target_id].tree
result = repo.object_store.tree_changes(source, target)
return list(result)
@reraise_safe_exceptions
def tree_items(self, wire, tree_id):
repo = self._factory.repo(wire)
tree = repo[tree_id]
result = []
for item in tree.iteritems():
item_sha = item.sha
item_mode = item.mode
if FILE_MODE(item_mode) == GIT_LINK:
item_type = "link"
else:
item_type = repo[item_sha].type_name
result.append((item.path, item_mode, item_sha, item_type))
return result
@reraise_safe_exceptions
def update_server_info(self, wire):
repo = self._factory.repo(wire)
update_server_info(repo)
@reraise_safe_exceptions
def discover_git_version(self):
stdout, _ = self.run_git_command(
{}, ['--version'], _bare=True, _safe=True)
backends: implemented functions for fetching backend versions via remote calls....
r101 prefix = 'git version'
if stdout.startswith(prefix):
stdout = stdout[len(prefix):]
git-version: strip the info returned from subcall to remove newline character.
r103 return stdout.strip()
initial commit
r0
@reraise_safe_exceptions
def run_git_command(self, wire, cmd, **opts):
path = wire.get('path', None)
if path and os.path.isdir(path):
opts['cwd'] = path
if '_bare' in opts:
_copts = []
del opts['_bare']
else:
_copts = ['-c', 'core.quotepath=false', ]
safe_call = False
if '_safe' in opts:
# no exc on failure
del opts['_safe']
safe_call = True
gitenv = os.environ.copy()
gitenv.update(opts.pop('extra_env', {}))
# need to clean fix GIT_DIR !
if 'GIT_DIR' in gitenv:
del gitenv['GIT_DIR']
gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
cmd = [settings.GIT_EXECUTABLE] + _copts + cmd
try:
_opts = {'env': gitenv, 'shell': False}
_opts.update(opts)
p = subprocessio.SubprocessIOChunker(cmd, **_opts)
return ''.join(p), ''.join(p.error)
except (EnvironmentError, OSError) as err:
tb_err = ("Couldn't run git command (%s).\n"
"Original error was:%s\n" % (cmd, err))
log.exception(tb_err)
if safe_call:
return '', err
else:
raise exceptions.VcsException(tb_err)
def str_to_dulwich(value):
"""
Dulwich 0.10.1a requires `unicode` objects to be passed in.
"""
return value.decode(settings.WIRE_ENCODING)