# HG changeset patch # User Mads Kiilerich # Date 2015-06-09 20:53:24 # Node ID 0e2d450feb031df599543d62a0245c453e58c3e0 # Parent e3840a1ec58cc1462c2b33667c76cb16fb2615d3 git: run external commands as list of strings so we really get correct quoting (Issue #135) a6dfd14d4b20 from https://bitbucket.org/conservancy/kallithea/pull-request/17/add-quotes-to-repo-urls-for-git-backend fixed that issue but did not make it "safe". The vcs git backend still used command strings but tried to quote them correctly ... but that approach is almost impossible to get right. Instead, pass a string list all the way to the subprocess module and let it do the quoting. This also makes some of the code more simple. diff --git a/kallithea/controllers/compare.py b/kallithea/controllers/compare.py --- a/kallithea/controllers/compare.py +++ b/kallithea/controllers/compare.py @@ -130,13 +130,13 @@ class CompareController(BaseRepoControll else: so, se = org_repo.run_git_command( - 'log --reverse --pretty="format: %%H" -s %s..%s' - % (org_rev, other_rev) + ['log', '--reverse', '--pretty=format:%H', + '-s', '%s..%s' % (org_rev, other_rev)] ) other_changesets = [org_repo.get_changeset(cs) for cs in re.findall(r'[0-9a-fA-F]{40}', so)] so, se = org_repo.run_git_command( - 'merge-base %s %s' % (org_rev, other_rev) + ['merge-base', org_rev, other_rev] ) ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0] org_changesets = [] diff --git a/kallithea/lib/hooks.py b/kallithea/lib/hooks.py --- a/kallithea/lib/hooks.py +++ b/kallithea/lib/hooks.py @@ -447,21 +447,21 @@ def handle_git_receive(repo_path, revs, repo._repo.refs.set_symbolic_ref('HEAD', 'refs/heads/%s' % push_ref['name']) - cmd = "for-each-ref --format='%(refname)' 'refs/heads/*'" + cmd = ['for-each-ref', '--format=%(refname)','refs/heads/*'] heads = repo.run_git_command(cmd)[0] + cmd = ['log', push_ref['new_rev'], + '--reverse', '--pretty=format:%H', '--not'] heads = heads.replace(push_ref['ref'], '') - heads = ' '.join(map(lambda c: c.strip('\n').strip(), - heads.splitlines())) - cmd = (('log %(new_rev)s' % push_ref) + - ' --reverse --pretty=format:"%H" --not ' + heads) + for l in heads.splitlines(): + cmd.append(l.strip()) git_revs += repo.run_git_command(cmd)[0].splitlines() elif push_ref['new_rev'] == EmptyChangeset().raw_id: #delete branch case git_revs += ['delete_branch=>%s' % push_ref['name']] else: - cmd = (('log %(old_rev)s..%(new_rev)s' % push_ref) + - ' --reverse --pretty=format:"%H"') + cmd = ['log', '%(old_rev)s..%(new_rev)s' % push_ref, + '--reverse', '--pretty=format:%H'] git_revs += repo.run_git_command(cmd)[0].splitlines() elif _type == 'tags': diff --git a/kallithea/lib/middleware/pygrack.py b/kallithea/lib/middleware/pygrack.py --- a/kallithea/lib/middleware/pygrack.py +++ b/kallithea/lib/middleware/pygrack.py @@ -82,13 +82,12 @@ class GitRepository(object): server_advert = '# service=%s' % git_command packet_len = str(hex(len(server_advert) + 4)[2:].rjust(4, '0')).lower() _git_path = kallithea.CONFIG.get('git_path', 'git') + cmd = [_git_path, git_command[4:], + '--stateless-rpc', '--advertise-refs', self.content_path] + log.debug('handling cmd %s', cmd) try: - out = subprocessio.SubprocessIOChunker( - r'%s %s --stateless-rpc --advertise-refs "%s"' % ( - _git_path, git_command[4:], self.content_path), - starting_values=[ - packet_len + server_advert + '0000' - ] + out = subprocessio.SubprocessIOChunker(cmd, + starting_values=[packet_len + server_advert + '0000'] ) except EnvironmentError, e: log.error(traceback.format_exc()) @@ -118,21 +117,17 @@ class GitRepository(object): else: inputstream = environ['wsgi.input'] + gitenv = dict(os.environ) + # forget all configs + gitenv['GIT_CONFIG_NOGLOBAL'] = '1' + cmd = [_git_path, git_command[4:], '--stateless-rpc', self.content_path] + log.debug('handling cmd %s', cmd) try: - gitenv = os.environ - # forget all configs - gitenv['GIT_CONFIG_NOGLOBAL'] = '1' - opts = dict( - env=gitenv, - cwd=self.content_path, - ) - cmd = r'%s %s --stateless-rpc "%s"' % (_git_path, git_command[4:], - self.content_path), - log.debug('handling cmd %s' % cmd) out = subprocessio.SubprocessIOChunker( cmd, inputstream=inputstream, - **opts + env=gitenv, + cwd=self.content_path, ) except EnvironmentError, e: log.error(traceback.format_exc()) diff --git a/kallithea/lib/utils.py b/kallithea/lib/utils.py --- a/kallithea/lib/utils.py +++ b/kallithea/lib/utils.py @@ -799,7 +799,7 @@ def check_git_version(): if 'git' not in BACKENDS: return None - stdout, stderr = GitRepository._run_git_command('--version', _bare=True, + stdout, stderr = GitRepository._run_git_command(['--version'], _bare=True, _safe=True) m = re.search("\d+.\d+.\d+", stdout) diff --git a/kallithea/lib/vcs/backends/git/changeset.py b/kallithea/lib/vcs/backends/git/changeset.py --- a/kallithea/lib/vcs/backends/git/changeset.py +++ b/kallithea/lib/vcs/backends/git/changeset.py @@ -189,7 +189,7 @@ class GitChangeset(BaseChangeset): """ rev_filter = settings.GIT_REV_FILTER so, se = self.repository.run_git_command( - "rev-list %s --children" % (rev_filter) + ['rev-list', rev_filter, '--children'] ) children = [] @@ -288,12 +288,12 @@ class GitChangeset(BaseChangeset): f_path = safe_str(path) if limit: - cmd = 'log -n %s --pretty="format: %%H" -s %s -- "%s"' % ( - safe_int(limit, 0), cs_id, f_path) + cmd = ['log', '-n', str(safe_int(limit, 0)), + '--pretty=format:%H', '-s', cs_id, '--', f_path] else: - cmd = 'log --pretty="format: %%H" -s %s -- "%s"' % ( - cs_id, f_path) + cmd = ['log', + '--pretty=format:%H', '-s', cs_id, '--', f_path] so, se = self.repository.run_git_command(cmd) ids = re.findall(r'[0-9a-fA-F]{40}', so) return [self.repository.get_changeset(sha) for sha in ids] @@ -321,7 +321,7 @@ class GitChangeset(BaseChangeset): generally not good. Should be replaced with algorithm iterating commits. """ - cmd = 'blame -l --root -r %s -- "%s"' % (self.id, path) + cmd = ['blame', '-l', '--root', '-r', self.id, '--', path] # -l ==> outputs long shas (and we need all 40 characters) # --root ==> doesn't put '^' character for boundaries # -r sha ==> blames for the given revision @@ -471,8 +471,8 @@ class GitChangeset(BaseChangeset): def _diff_name_status(self): output = [] for parent in self.parents: - cmd = 'diff --name-status %s %s --encoding=utf8' % (parent.raw_id, - self.raw_id) + cmd = ['diff', '--name-status', parent.raw_id, self.raw_id, + '--encoding=utf8'] so, se = self.repository.run_git_command(cmd) output.append(so.strip()) return '\n'.join(output) diff --git a/kallithea/lib/vcs/backends/git/repository.py b/kallithea/lib/vcs/backends/git/repository.py --- a/kallithea/lib/vcs/backends/git/repository.py +++ b/kallithea/lib/vcs/backends/git/repository.py @@ -18,18 +18,6 @@ import urllib2 import logging import posixpath import string -import sys -if sys.platform == "win32": - from subprocess import list2cmdline - def quote(s): - return list2cmdline([s]) -else: - try: - # Python <=2.7 - from pipes import quote - except ImportError: - # Python 3.3+ - from shlex import quote from dulwich.objects import Tag from dulwich.repo import Repo, NotGitRepository @@ -135,10 +123,7 @@ class GitRepository(BaseRepository): del opts['_safe'] safe_call = True - _str_cmd = False - if isinstance(cmd, basestring): - cmd = [cmd] - _str_cmd = True + assert isinstance(cmd, list), cmd gitenv = os.environ # need to clean fix GIT_DIR ! @@ -148,13 +133,11 @@ class GitRepository(BaseRepository): _git_path = settings.GIT_EXECUTABLE_PATH cmd = [_git_path] + _copts + cmd - if _str_cmd: - cmd = ' '.join(cmd) try: _opts = dict( env=gitenv, - shell=True, + shell=False, ) _opts.update(opts) p = subprocessio.SubprocessIOChunker(cmd, **_opts) @@ -268,7 +251,7 @@ class GitRepository(BaseRepository): return [] rev_filter = settings.GIT_REV_FILTER - cmd = 'rev-list %s --reverse --date-order' % (rev_filter) + cmd = ['rev-list', rev_filter, '--reverse', '--date-order'] try: so, se = self.run_git_command(cmd) except RepositoryError: @@ -551,22 +534,16 @@ class GitRepository(BaseRepository): # %H at format means (full) commit hash, initial hashes are retrieved # in ascending date order - cmd_template = 'log --date-order --reverse --pretty=format:"%H"' - cmd_params = {} + cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H'] if start_date: - cmd_template += ' --since "$since"' - cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S') + cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')] if end_date: - cmd_template += ' --until "$until"' - cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S') + cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')] if branch_name: - cmd_template += ' $branch_name' - cmd_params['branch_name'] = branch_name + cmd.append(branch_name) else: - rev_filter = settings.GIT_REV_FILTER - cmd_template += ' %s' % (rev_filter) + cmd.append(settings.GIT_REV_FILTER) - cmd = string.Template(cmd_template).safe_substitute(**cmd_params) revs = self.run_git_command(cmd)[0].splitlines() start_pos = 0 end_pos = len(revs) @@ -622,14 +599,14 @@ class GitRepository(BaseRepository): if rev1 == self.EMPTY_CHANGESET: rev2 = self.get_changeset(rev2).raw_id - cmd = ' '.join(['show'] + flags + [rev2]) + cmd = ['show'] + flags + [rev2] else: rev1 = self.get_changeset(rev1).raw_id rev2 = self.get_changeset(rev2).raw_id - cmd = ' '.join(['diff'] + flags + [rev1, rev2]) + cmd = ['diff'] + flags + [rev1, rev2] if path: - cmd += ' -- "%s"' % path + cmd += ['--', path] stdout, stderr = self.run_git_command(cmd) # TODO: don't ignore stderr @@ -663,8 +640,7 @@ class GitRepository(BaseRepository): cmd.append('--bare') elif not update_after_clone: cmd.append('--no-checkout') - cmd += ['--', quote(url), quote(self.path)] - cmd = ' '.join(cmd) + cmd += ['--', url, self.path] # If error occurs run_git_command raises RepositoryError already self.run_git_command(cmd) @@ -673,8 +649,7 @@ class GitRepository(BaseRepository): Tries to pull changes from external location. """ url = self._get_url(url) - cmd = ['pull', "--ff-only", quote(url)] - cmd = ' '.join(cmd) + cmd = ['pull', '--ff-only', url] # If error occurs run_git_command raises RepositoryError already self.run_git_command(cmd) @@ -683,13 +658,11 @@ class GitRepository(BaseRepository): Tries to pull changes from external location. """ url = self._get_url(url) - so, se = self.run_git_command('ls-remote -h %s' % quote(url)) - refs = [] + so, se = self.run_git_command(['ls-remote', '-h', url]) + cmd = ['fetch', url, '--'] for line in (x for x in so.splitlines()): sha, ref = line.split('\t') - refs.append(ref) - refs = ' '.join(('+%s:%s' % (r, r) for r in refs)) - cmd = '''fetch %s -- %s''' % (quote(url), refs) + cmd.append('+%s:%s' % (ref, ref)) self.run_git_command(cmd) def _update_server_info(self): diff --git a/kallithea/lib/vcs/subprocessio.py b/kallithea/lib/vcs/subprocessio.py --- a/kallithea/lib/vcs/subprocessio.py +++ b/kallithea/lib/vcs/subprocessio.py @@ -342,11 +342,9 @@ class SubprocessIOChunker(object): input_streamer.start() inputstream = input_streamer.output - _shell = kwargs.get('shell', True) - if isinstance(cmd, (list, tuple)): - cmd = ' '.join(cmd) + # Note: fragile cmd mangling has been removed for use in Kallithea + assert isinstance(cmd, list), cmd - kwargs['shell'] = _shell _p = subprocess.Popen(cmd, bufsize=-1, stdin=inputstream, stdout=subprocess.PIPE, diff --git a/kallithea/tests/vcs/test_git.py b/kallithea/tests/vcs/test_git.py --- a/kallithea/tests/vcs/test_git.py +++ b/kallithea/tests/vcs/test_git.py @@ -682,34 +682,34 @@ class GitSpecificWithRepoTest(_BackendTe 'base') def test_workdir_get_branch(self): - self.repo.run_git_command('checkout -b production') + self.repo.run_git_command(['checkout', '-b', 'production']) # Regression test: one of following would fail if we don't check # .git/HEAD file - self.repo.run_git_command('checkout production') + self.repo.run_git_command(['checkout', 'production']) self.assertEqual(self.repo.workdir.get_branch(), 'production') - self.repo.run_git_command('checkout master') + self.repo.run_git_command(['checkout', 'master']) self.assertEqual(self.repo.workdir.get_branch(), 'master') def test_get_diff_runs_git_command_with_hashes(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(0, 1) self.repo.run_git_command.assert_called_once_with( - 'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s' % - (3, self.repo._get_revision(0), self.repo._get_revision(1))) + ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(0), self.repo._get_revision(1)]) def test_get_diff_runs_git_command_with_str_hashes(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(self.repo.EMPTY_CHANGESET, 1) self.repo.run_git_command.assert_called_once_with( - 'show -U%s --full-index --binary -p -M --abbrev=40 %s' % - (3, self.repo._get_revision(1))) + ['show', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(1)]) def test_get_diff_runs_git_command_with_path_if_its_given(self): self.repo.run_git_command = mock.Mock(return_value=['', '']) self.repo.get_diff(0, 1, 'foo') self.repo.run_git_command.assert_called_once_with( - 'diff -U%s --full-index --binary -p -M --abbrev=40 %s %s -- "foo"' - % (3, self.repo._get_revision(0), self.repo._get_revision(1))) + ['diff', '-U3', '--full-index', '--binary', '-p', '-M', '--abbrev=40', + self.repo._get_revision(0), self.repo._get_revision(1), '--', 'foo']) class GitRegressionTest(_BackendTestMixin, unittest.TestCase):