upstream/kallithea Commit - r3830:08d439bf

fixed handling shell argument in subprocess calls, it always was hardcoded even when passed properly in arguments

marcink -

r3830:08d439bf beta

parent child

rhodecode/lib/vcs/backends/git/repository.py

0 +2 -1

             # -*- coding: utf-8 -*-
             """
                 vcs.backends.git.repository
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 Git repository implementation.
                 :created_on: Apr 8, 2010
                 :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
             """
             import os
             import re
             import time
             import urllib
             import urllib2
             import logging
             import posixpath
             import string
             from dulwich.objects import Tag
             from dulwich.repo import Repo, NotGitRepository
             from rhodecode.lib.vcs import subprocessio
             from rhodecode.lib.vcs.backends.base import BaseRepository, CollectionGenerator
             from rhodecode.lib.vcs.conf import settings
             from rhodecode.lib.vcs.exceptions import (
                 BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError,
                 RepositoryError, TagAlreadyExistError, TagDoesNotExistError
             )
             from rhodecode.lib.vcs.utils import safe_unicode, makedate, date_fromtimestamp
             from rhodecode.lib.vcs.utils.lazy import LazyProperty
             from rhodecode.lib.vcs.utils.ordered_dict import OrderedDict
             from rhodecode.lib.vcs.utils.paths import abspath, get_user_home
             from rhodecode.lib.vcs.utils.hgcompat import (
                 hg_url, httpbasicauthhandler, httpdigestauthhandler
             )
             from .changeset import GitChangeset
             from .config import ConfigFile
             from .inmemory import GitInMemoryChangeset
             from .workdir import GitWorkdir
             SHA_PATTERN = re.compile(r'^[[0-9a-fA-F]{12}|[0-9a-fA-F]{40}]$')
             log = logging.getLogger(__name__)
             class GitRepository(BaseRepository):
                 """
                 Git repository backend.
                 """
                 DEFAULT_BRANCH_NAME = 'master'
                 scm = 'git'
                 def __init__(self, repo_path, create=False, src_url=None,
                              update_after_clone=False, bare=False):
                     self.path = abspath(repo_path)
                     repo = self._get_repo(create, src_url, update_after_clone, bare)
                     self.bare = repo.bare
                 @property
                 def _config_files(self):
                     return [
                         self.bare and abspath(self.path, 'config')
                                   or abspath(self.path, '.git', 'config'),
                          abspath(get_user_home(), '.gitconfig'),
                      ]
                 @property
                 def _repo(self):
                     return Repo(self.path)
                 @property
                 def head(self):
                     try:
                         return self._repo.head()
                     except KeyError:
                         return None
                 @LazyProperty
                 def revisions(self):
                     """
                     Returns list of revisions' ids, in ascending order.  Being lazy
                     attribute allows external tools to inject shas from cache.
                     """
                     return self._get_all_revisions()
                 @classmethod
                 def _run_git_command(cls, cmd, **opts):
                     """
                     Runs given ``cmd`` as git command and returns tuple
                     (stdout, stderr).
                     :param cmd: git command to be executed
                     :param opts: env options to pass into Subprocess command
                     """
                     if '_bare' in opts:
                         _copts = []
                         del opts['_bare']
                     else:
                         _copts = ['-c', 'core.quotepath=false', ]
                     safe_call = False
                     if '_safe' in opts:
                         #no exc on failure
                         del opts['_safe']
                         safe_call = True
                     _str_cmd = False
                     if isinstance(cmd, basestring):
                         cmd = [cmd]
                         _str_cmd = True
                     gitenv = os.environ
                     # need to clean fix GIT_DIR !
                     if 'GIT_DIR' in gitenv:
                         del gitenv['GIT_DIR']
                     gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
                     _git_path = settings.GIT_EXECUTABLE_PATH
                     cmd = [_git_path] + _copts + cmd
                     if _str_cmd:
                         cmd = ' '.join(cmd)
                     try:
                         _opts = dict(
                             env=gitenv,
-                            shell=False,
+                            shell=True,
                         )
                         _opts.update(opts)
                         p = subprocessio.SubprocessIOChunker(cmd, **_opts)
                     except (EnvironmentError, OSError), err:
                         tb_err = ("Couldn't run git command (%s).\n"
                                   "Original error was:%s\n" % (cmd, err))
                         log.error(tb_err)
                         if safe_call:
                             return '', err
                         else:
                             raise RepositoryError(tb_err)
                     return ''.join(p.output), ''.join(p.error)
                 def run_git_command(self, cmd):
                     opts = {}
                     if os.path.isdir(self.path):
                         opts['cwd'] = self.path
                     return self._run_git_command(cmd, **opts)
                 @classmethod
                 def _check_url(cls, url):
                     """
                     Functon will check given url and try to verify if it's a valid
                     link. Sometimes it may happened that mercurial will issue basic
                     auth request that can cause whole API to hang when used from python
                     or other external calls.
                     On failures it'll raise urllib2.HTTPError
                     """
                     # check first if it's not an local url
                     if os.path.isdir(url) or url.startswith('file:'):
                         return True
                     if('+' in url[:url.find('://')]):
                         url = url[url.find('+') + 1:]
                     handlers = []
                     test_uri, authinfo = hg_url(url).authinfo()
                     if not test_uri.endswith('info/refs'):
                         test_uri = test_uri.rstrip('/') + '/info/refs'
                     if authinfo:
                         #create a password manager
                         passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                         passmgr.add_password(*authinfo)
                         handlers.extend((httpbasicauthhandler(passmgr),
                                          httpdigestauthhandler(passmgr)))
                     o = urllib2.build_opener(*handlers)
                     o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
                     q = {"service": 'git-upload-pack'}
                     qs = '?%s' % urllib.urlencode(q)
                     cu = "%s%s" % (test_uri, qs)
                     req = urllib2.Request(cu, None, {})
                     try:
                         resp = o.open(req)
                         return resp.code == 200
                     except Exception, e:
                         # means it cannot be cloned
                         raise urllib2.URLError("[%s] %s" % (url, e))
                 def _get_repo(self, create, src_url=None, update_after_clone=False,
                               bare=False):
                     if create and os.path.exists(self.path):
                         raise RepositoryError("Location already exist")
                     if src_url and not create:
                         raise RepositoryError("Create should be set to True if src_url is "
                                               "given (clone operation creates repository)")
                     try:
                         if create and src_url:
                             GitRepository._check_url(src_url)
                             self.clone(src_url, update_after_clone, bare)
                             return Repo(self.path)
                         elif create:
                             os.mkdir(self.path)
                             if bare:
                                 return Repo.init_bare(self.path)
                             else:
                                 return Repo.init(self.path)
                         else:
                             return self._repo
                     except (NotGitRepository, OSError), err:
                         raise RepositoryError(err)
                 def _get_all_revisions(self):
                     # we must check if this repo is not empty, since later command
                     # fails if it is. And it's cheaper to ask than throw the subprocess
                     # errors
                     try:
                         self._repo.head()
                     except KeyError:
                         return []
                     rev_filter = _git_path = settings.GIT_REV_FILTER
                     cmd = 'rev-list %s --reverse --date-order' % (rev_filter)
                     try:
                         so, se = self.run_git_command(cmd)
                     except RepositoryError:
                         # Can be raised for empty repositories
                         return []
                     return so.splitlines()
                 def _get_all_revisions2(self):
                     #alternate implementation using dulwich
                     includes = [x[1][0] for x in self._parsed_refs.iteritems()
                                 if x[1][1] != 'T']
                     return [c.commit.id for c in self._repo.get_walker(include=includes)]
                 def _get_revision(self, revision):
                     """
                     For git backend we always return integer here. This way we ensure
                     that changset's revision attribute would become integer.
                     """
                     is_null = lambda o: len(o) == revision.count('0')
                     try:
                         self.revisions[0]
                     except (KeyError, IndexError):
                         raise EmptyRepositoryError("There are no changesets yet")
                     if revision in (None, '', 'tip', 'HEAD', 'head', -1):
                         return self.revisions[-1]
                     is_bstr = isinstance(revision, (str, unicode))
                     if ((is_bstr and revision.isdigit() and len(revision) < 12)
                         or isinstance(revision, int) or is_null(revision)):
                         try:
                             revision = self.revisions[int(revision)]
                         except Exception:
                             raise ChangesetDoesNotExistError("Revision %s does not exist "
                                 "for this repository" % (revision))
                     elif is_bstr:
                         # get by branch/tag name
                         _ref_revision = self._parsed_refs.get(revision)
                         if _ref_revision:  # and _ref_revision[1] in ['H', 'RH', 'T']:
                             return _ref_revision[0]
                         _tags_shas = self.tags.values()
                         # maybe it's a tag ? we don't have them in self.revisions
                         if revision in _tags_shas:
                             return _tags_shas[_tags_shas.index(revision)]
                         elif not SHA_PATTERN.match(revision) or revision not in self.revisions:
                             raise ChangesetDoesNotExistError("Revision %s does not exist "
                                 "for this repository" % (revision))
                     # Ensure we return full id
                     if not SHA_PATTERN.match(str(revision)):
                         raise ChangesetDoesNotExistError("Given revision %s not recognized"
                             % revision)
                     return revision
                 def _get_archives(self, archive_name='tip'):
                     for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
                             yield {"type": i[0], "extension": i[1], "node": archive_name}
                 def _get_url(self, url):
                     """
                     Returns normalized url. If schema is not given, would fall to
                     filesystem (``file:///``) schema.
                     """
                     url = str(url)
                     if url != 'default' and not '://' in url:
                         url = ':///'.join(('file', url))
                     return url
                 def get_hook_location(self):
                     """
                     returns absolute path to location where hooks are stored
                     """
                     loc = os.path.join(self.path, 'hooks')
                     if not self.bare:
                         loc = os.path.join(self.path, '.git', 'hooks')
                     return loc
                 @LazyProperty
                 def name(self):
                     return os.path.basename(self.path)
                 @LazyProperty
                 def last_change(self):
                     """
                     Returns last change made on this repository as datetime object
                     """
                     return date_fromtimestamp(self._get_mtime(), makedate()[1])
                 def _get_mtime(self):
                     try:
                         return time.mktime(self.get_changeset().date.timetuple())
                     except RepositoryError:
                         idx_loc = '' if self.bare else '.git'
                         # fallback to filesystem
                         in_path = os.path.join(self.path, idx_loc, "index")
                         he_path = os.path.join(self.path, idx_loc, "HEAD")
                         if os.path.exists(in_path):
                             return os.stat(in_path).st_mtime
                         else:
                             return os.stat(he_path).st_mtime
                 @LazyProperty
                 def description(self):
                     idx_loc = '' if self.bare else '.git'
                     undefined_description = u'unknown'
                     description_path = os.path.join(self.path, idx_loc, 'description')
                     if os.path.isfile(description_path):
                         return safe_unicode(open(description_path).read())
                     else:
                         return undefined_description
                 @LazyProperty
                 def contact(self):
                     undefined_contact = u'Unknown'
                     return undefined_contact
                 @property
                 def branches(self):
                     if not self.revisions:
                         return {}
                     sortkey = lambda ctx: ctx[0]
                     _branches = [(x[0], x[1][0])
                                  for x in self._parsed_refs.iteritems() if x[1][1] == 'H']
                     return OrderedDict(sorted(_branches, key=sortkey, reverse=False))
                 @LazyProperty
                 def tags(self):
                     return self._get_tags()
                 def _get_tags(self):
                     if not self.revisions:
                         return {}
                     sortkey = lambda ctx: ctx[0]
                     _tags = [(x[0], x[1][0])
                              for x in self._parsed_refs.iteritems() if x[1][1] == 'T']
                     return OrderedDict(sorted(_tags, key=sortkey, reverse=True))
                 def tag(self, name, user, revision=None, message=None, date=None,
                         **kwargs):
                     """
                     Creates and returns a tag for the given ``revision``.
                     :param name: name for new tag
                     :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
                     :param revision: changeset id for which new tag would be created
                     :param message: message of the tag's commit
                     :param date: date of tag's commit
                     :raises TagAlreadyExistError: if tag with same name already exists
                     """
                     if name in self.tags:
                         raise TagAlreadyExistError("Tag %s already exists" % name)
                     changeset = self.get_changeset(revision)
                     message = message or "Added tag %s for commit %s" % (name,
                         changeset.raw_id)
                     self._repo.refs["refs/tags/%s" % name] = changeset._commit.id
                     self._parsed_refs = self._get_parsed_refs()
                     self.tags = self._get_tags()
                     return changeset
                 def remove_tag(self, name, user, message=None, date=None):
                     """
                     Removes tag with the given ``name``.
                     :param name: name of the tag to be removed
                     :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
                     :param message: message of the tag's removal commit
                     :param date: date of tag's removal commit
                     :raises TagDoesNotExistError: if tag with given name does not exists
                     """
                     if name not in self.tags:
                         raise TagDoesNotExistError("Tag %s does not exist" % name)
                     tagpath = posixpath.join(self._repo.refs.path, 'refs', 'tags', name)
                     try:
                         os.remove(tagpath)
                         self._parsed_refs = self._get_parsed_refs()
                         self.tags = self._get_tags()
                     except OSError, e:
                         raise RepositoryError(e.strerror)
                 @LazyProperty
                 def _parsed_refs(self):
                     return self._get_parsed_refs()
                 def _get_parsed_refs(self):
                     # cache the property
                     _repo = self._repo
                     refs = _repo.get_refs()
                     keys = [('refs/heads/', 'H'),
                             ('refs/remotes/origin/', 'RH'),
                             ('refs/tags/', 'T')]
                     _refs = {}
                     for ref, sha in refs.iteritems():
                         for k, type_ in keys:
                             if ref.startswith(k):
                                 _key = ref[len(k):]
                                 if type_ == 'T':
                                     obj = _repo.get_object(sha)
                                     if isinstance(obj, Tag):
                                         sha = _repo.get_object(sha).object[1]
                                 _refs[_key] = [sha, type_]
                                 break
                     return _refs
                 def _heads(self, reverse=False):
                     refs = self._repo.get_refs()
                     heads = {}
                     for key, val in refs.items():
                         for ref_key in ['refs/heads/', 'refs/remotes/origin/']:
                             if key.startswith(ref_key):
                                 n = key[len(ref_key):]
                                 if n not in ['HEAD']:
                                     heads[n] = val
                     return heads if reverse else dict((y, x) for x, y in heads.iteritems())
                 def get_changeset(self, revision=None):
                     """
                     Returns ``GitChangeset`` object representing commit from git repository
                     at the given revision or head (most recent commit) if None given.
                     """
                     if isinstance(revision, GitChangeset):
                         return revision
                     revision = self._get_revision(revision)
                     changeset = GitChangeset(repository=self, revision=revision)
                     return changeset
                 def get_changesets(self, start=None, end=None, start_date=None,
                        end_date=None, branch_name=None, reverse=False):
                     """
                     Returns iterator of ``GitChangeset`` objects from start to end (both
                     are inclusive), in ascending date order (unless ``reverse`` is set).
                     :param start: changeset ID, as str; first returned changeset
                     :param end: changeset ID, as str; last returned changeset
                     :param start_date: if specified, changesets with commit date less than
                       ``start_date`` would be filtered out from returned set
                     :param end_date: if specified, changesets with commit date greater than
                       ``end_date`` would be filtered out from returned set
                     :param branch_name: if specified, changesets not reachable from given
                       branch would be filtered out from returned set
                     :param reverse: if ``True``, returned generator would be reversed
                       (meaning that returned changesets would have descending date order)
                     :raise BranchDoesNotExistError: If given ``branch_name`` does not
                         exist.
                     :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
                       ``end`` could not be found.
                     """
                     if branch_name and branch_name not in self.branches:
                         raise BranchDoesNotExistError("Branch '%s' not found" \
                                                       % branch_name)
                     # %H at format means (full) commit hash, initial hashes are retrieved
                     # in ascending date order
                     cmd_template = 'log --date-order --reverse --pretty=format:"%H"'
                     cmd_params = {}
                     if start_date:
                         cmd_template += ' --since "$since"'
                         cmd_params['since'] = start_date.strftime('%m/%d/%y %H:%M:%S')
                     if end_date:
                         cmd_template += ' --until "$until"'
                         cmd_params['until'] = end_date.strftime('%m/%d/%y %H:%M:%S')
                     if branch_name:
                         cmd_template += ' $branch_name'
                         cmd_params['branch_name'] = branch_name
                     else:
                         rev_filter = _git_path = settings.GIT_REV_FILTER
                         cmd_template += ' %s' % (rev_filter)
                     cmd = string.Template(cmd_template).safe_substitute(**cmd_params)
                     revs = self.run_git_command(cmd)[0].splitlines()
                     start_pos = 0
                     end_pos = len(revs)
                     if start:
                         _start = self._get_revision(start)
                         try:
                             start_pos = revs.index(_start)
                         except ValueError:
                             pass
                     if end is not None:
                         _end = self._get_revision(end)
                         try:
                             end_pos = revs.index(_end)
                         except ValueError:
                             pass
                     if None not in [start, end] and start_pos > end_pos:
                         raise RepositoryError('start cannot be after end')
                     if end_pos is not None:
                         end_pos += 1
                     revs = revs[start_pos:end_pos]
                     if reverse:
                         revs = reversed(revs)
                     return CollectionGenerator(self, revs)
                 def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
                              context=3):
                     """
                     Returns (git like) *diff*, as plain text. Shows changes introduced by
                     ``rev2`` since ``rev1``.
                     :param rev1: Entry point from which diff is shown. Can be
                       ``self.EMPTY_CHANGESET`` - in this case, patch showing all
                       the changes since empty state of the repository until ``rev2``
                     :param rev2: Until which revision changes should be shown.
                     :param ignore_whitespace: If set to ``True``, would not show whitespace
                       changes. Defaults to ``False``.
                     :param context: How many lines before/after changed lines should be
                       shown. Defaults to ``3``.
                     """
                     flags = ['-U%s' % context, '--full-index', '--binary', '-p', '-M', '--abbrev=40']
                     if ignore_whitespace:
                         flags.append('-w')
                     if hasattr(rev1, 'raw_id'):
                         rev1 = getattr(rev1, 'raw_id')
                     if hasattr(rev2, 'raw_id'):
                         rev2 = getattr(rev2, 'raw_id')
                     if rev1 == self.EMPTY_CHANGESET:
                         rev2 = self.get_changeset(rev2).raw_id
                         cmd = ' '.join(['show'] + flags + [rev2])
                     else:
                         rev1 = self.get_changeset(rev1).raw_id
                         rev2 = self.get_changeset(rev2).raw_id
                         cmd = ' '.join(['diff'] + flags + [rev1, rev2])
                     if path:
                         cmd += ' -- "%s"' % path
                     stdout, stderr = self.run_git_command(cmd)
                     # If we used 'show' command, strip first few lines (until actual diff
                     # starts)
                     if rev1 == self.EMPTY_CHANGESET:
                         lines = stdout.splitlines()
                         x = 0
                         for line in lines:
                             if line.startswith('diff'):
                                 break
                             x += 1
                         # Append new line just like 'diff' command do
                         stdout = '\n'.join(lines[x:]) + '\n'
                     return stdout
                 @LazyProperty
                 def in_memory_changeset(self):
                     """
                     Returns ``GitInMemoryChangeset`` object for this repository.
                     """
                     return GitInMemoryChangeset(self)
                 def clone(self, url, update_after_clone=True, bare=False):
                     """
                     Tries to clone changes from external location.
                     :param update_after_clone: If set to ``False``, git won't checkout
                       working directory
                     :param bare: If set to ``True``, repository would be cloned into
                       *bare* git repository (no working directory at all).
                     """
                     url = self._get_url(url)
                     cmd = ['clone']
                     if bare:
                         cmd.append('--bare')
                     elif not update_after_clone:
                         cmd.append('--no-checkout')
                     cmd += ['--', '"%s"' % url, '"%s"' % self.path]
                     cmd = ' '.join(cmd)
                     # If error occurs run_git_command raises RepositoryError already
                     self.run_git_command(cmd)
                 def pull(self, url):
                     """
                     Tries to pull changes from external location.
                     """
                     url = self._get_url(url)
                     cmd = ['pull']
                     cmd.append("--ff-only")
                     cmd.append(url)
                     cmd = ' '.join(cmd)
                     # If error occurs run_git_command raises RepositoryError already
                     self.run_git_command(cmd)
                 def fetch(self, url):
                     """
                     Tries to pull changes from external location.
                     """
                     url = self._get_url(url)
                     so, se = self.run_git_command('ls-remote -h %s' % url)
                     refs = []
                     for line in (x for x in so.splitlines()):
                         sha, ref = line.split('\t')
                         refs.append(ref)
                     refs = ' '.join(('+%s:%s' % (r, r) for r in refs))
                     cmd = '''fetch %s -- %s''' % (url, refs)
                     self.run_git_command(cmd)
                 @LazyProperty
                 def workdir(self):
                     """
                     Returns ``Workdir`` instance for this repository.
                     """
                     return GitWorkdir(self)
                 def get_config_value(self, section, name, config_file=None):
                     """
                     Returns configuration value for a given [``section``] and ``name``.
                     :param section: Section we want to retrieve value from
                     :param name: Name of configuration we want to retrieve
                     :param config_file: A path to file which should be used to retrieve
                       configuration from (might also be a list of file paths)
                     """
                     if config_file is None:
                         config_file = []
                     elif isinstance(config_file, basestring):
                         config_file = [config_file]
                     def gen_configs():
                         for path in config_file + self._config_files:
                             try:
                                 yield ConfigFile.from_path(path)
                             except (IOError, OSError, ValueError):
                                 continue
                     for config in gen_configs():
                         try:
                             return config.get(section, name)
                         except KeyError:
                             continue
                     return None
                 def get_user_name(self, config_file=None):
                     """
                     Returns user's name from global configuration file.
                     :param config_file: A path to file which should be used to retrieve
                       configuration from (might also be a list of file paths)
                     """
                     return self.get_config_value('user', 'name', config_file)
                 def get_user_email(self, config_file=None):
                     """
                     Returns user's email from global configuration file.
                     :param config_file: A path to file which should be used to retrieve
                       configuration from (might also be a list of file paths)
                     """
                     return self.get_config_value('user', 'email', config_file)

rhodecode/lib/vcs/subprocessio.py

0 +1 -1

             '''
             Module provides a class allowing to wrap communication over subprocess.Popen
             input, output, error streams into a meaningfull, non-blocking, concurrent
             stream processor exposing the output data as an iterator fitting to be a
             return value passed by a WSGI applicaiton to a WSGI server per PEP 3333.
             Copyright (c) 2011  Daniel Dotsenko <dotsa@hotmail.com>
             This file is part of git_http_backend.py Project.
             git_http_backend.py Project is free software: you can redistribute it and/or
             modify it under the terms of the GNU Lesser General Public License as
             published by the Free Software Foundation, either version 2.1 of the License,
             or (at your option) any later version.
             git_http_backend.py Project is distributed in the hope that it will be useful,
             but WITHOUT ANY WARRANTY; without even the implied warranty of
             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
             GNU Lesser General Public License for more details.
             You should have received a copy of the GNU Lesser General Public License
             along with git_http_backend.py Project.
             If not, see <http://www.gnu.org/licenses/>.
             '''
             import os
             import subprocess
             from rhodecode.lib.vcs.utils.compat import deque, Event, Thread, _bytes, _bytearray
             class StreamFeeder(Thread):
                 """
                 Normal writing into pipe-like is blocking once the buffer is filled.
                 This thread allows a thread to seep data from a file-like into a pipe
                 without blocking the main thread.
                 We close inpipe once the end of the source stream is reached.
                 """
                 def __init__(self, source):
                     super(StreamFeeder, self).__init__()
                     self.daemon = True
                     filelike = False
                     self.bytes = _bytes()
                     if type(source) in (type(''), _bytes, _bytearray):  # string-like
                         self.bytes = _bytes(source)
                     else:  # can be either file pointer or file-like
                         if type(source) in (int, long):  # file pointer it is
                             ## converting file descriptor (int) stdin into file-like
                             try:
                                 source = os.fdopen(source, 'rb', 16384)
                             except Exception:
                                 pass
                         # let's see if source is file-like by now
                         try:
                             filelike = source.read
                         except Exception:
                             pass
                     if not filelike and not self.bytes:
                         raise TypeError("StreamFeeder's source object must be a readable "
                                         "file-like, a file descriptor, or a string-like.")
                     self.source = source
                     self.readiface, self.writeiface = os.pipe()
                 def run(self):
                     t = self.writeiface
                     if self.bytes:
                         os.write(t, self.bytes)
                     else:
                         s = self.source
                         b = s.read(4096)
                         while b:
                             os.write(t, b)
                             b = s.read(4096)
                     os.close(t)
                 @property
                 def output(self):
                     return self.readiface
             class InputStreamChunker(Thread):
                 def __init__(self, source, target, buffer_size, chunk_size):
                     super(InputStreamChunker, self).__init__()
                     self.daemon = True  # die die die.
                     self.source = source
                     self.target = target
                     self.chunk_count_max = int(buffer_size / chunk_size) + 1
                     self.chunk_size = chunk_size
                     self.data_added = Event()
                     self.data_added.clear()
                     self.keep_reading = Event()
                     self.keep_reading.set()
                     self.EOF = Event()
                     self.EOF.clear()
                     self.go = Event()
                     self.go.set()
                 def stop(self):
                     self.go.clear()
                     self.EOF.set()
                     try:
                         # this is not proper, but is done to force the reader thread let
                         # go of the input because, if successful, .close() will send EOF
                         # down the pipe.
                         self.source.close()
                     except:
                         pass
                 def run(self):
                     s = self.source
                     t = self.target
                     cs = self.chunk_size
                     ccm = self.chunk_count_max
                     kr = self.keep_reading
                     da = self.data_added
                     go = self.go
                     try:
                         b = s.read(cs)
                     except ValueError:
                         b = ''
                     while b and go.is_set():
                         if len(t) > ccm:
                             kr.clear()
                             kr.wait(2)
             #                # this only works on 2.7.x and up
             #                if not kr.wait(10):
             #                    raise Exception("Timed out while waiting for input to be read.")
                             # instead we'll use this
                             if len(t) > ccm + 3:
                                 raise IOError("Timed out while waiting for input from subprocess.")
                         t.append(b)
                         da.set()
                         b = s.read(cs)
                     self.EOF.set()
                     da.set()  # for cases when done but there was no input.
             class BufferedGenerator():
                 '''
                 Class behaves as a non-blocking, buffered pipe reader.
                 Reads chunks of data (through a thread)
                 from a blocking pipe, and attaches these to an array (Deque) of chunks.
                 Reading is halted in the thread when max chunks is internally buffered.
                 The .next() may operate in blocking or non-blocking fashion by yielding
                 '' if no data is ready
                 to be sent or by not returning until there is some data to send
                 When we get EOF from underlying source pipe we raise the marker to raise
                 StopIteration after the last chunk of data is yielded.
                 '''
                 def __init__(self, source, buffer_size=65536, chunk_size=4096,
                              starting_values=[], bottomless=False):
                     if bottomless:
                         maxlen = int(buffer_size / chunk_size)
                     else:
                         maxlen = None
                     self.data = deque(starting_values, maxlen)
                     self.worker = InputStreamChunker(source, self.data, buffer_size,
                                                      chunk_size)
                     if starting_values:
                         self.worker.data_added.set()
                     self.worker.start()
                 ####################
                 # Generator's methods
                 ####################
                 def __iter__(self):
                     return self
                 def next(self):
                     while not len(self.data) and not self.worker.EOF.is_set():
                         self.worker.data_added.clear()
                         self.worker.data_added.wait(0.2)
                     if len(self.data):
                         self.worker.keep_reading.set()
                         return _bytes(self.data.popleft())
                     elif self.worker.EOF.is_set():
                         raise StopIteration
                 def throw(self, type, value=None, traceback=None):
                     if not self.worker.EOF.is_set():
                         raise type(value)
                 def start(self):
                     self.worker.start()
                 def stop(self):
                     self.worker.stop()
                 def close(self):
                     try:
                         self.worker.stop()
                         self.throw(GeneratorExit)
                     except (GeneratorExit, StopIteration):
                         pass
                 def __del__(self):
                     self.close()
                 ####################
                 # Threaded reader's infrastructure.
                 ####################
                 @property
                 def input(self):
                     return self.worker.w
                 @property
                 def data_added_event(self):
                     return self.worker.data_added
                 @property
                 def data_added(self):
                     return self.worker.data_added.is_set()
                 @property
                 def reading_paused(self):
                     return not self.worker.keep_reading.is_set()
                 @property
                 def done_reading_event(self):
                     '''
                     Done_reding does not mean that the iterator's buffer is empty.
                     Iterator might have done reading from underlying source, but the read
                     chunks might still be available for serving through .next() method.
                     @return An Event class instance.
                     '''
                     return self.worker.EOF
                 @property
                 def done_reading(self):
                     '''
                     Done_reding does not mean that the iterator's buffer is empty.
                     Iterator might have done reading from underlying source, but the read
                     chunks might still be available for serving through .next() method.
                     @return An Bool value.
                     '''
                     return self.worker.EOF.is_set()
                 @property
                 def length(self):
                     '''
                     returns int.
                     This is the lenght of the que of chunks, not the length of
                     the combined contents in those chunks.
                     __len__() cannot be meaningfully implemented because this
                     reader is just flying throuh a bottomless pit content and
                     can only know the lenght of what it already saw.
                     If __len__() on WSGI server per PEP 3333 returns a value,
                     the responce's length will be set to that. In order not to
                     confuse WSGI PEP3333 servers, we will not implement __len__
                     at all.
                     '''
                     return len(self.data)
                 def prepend(self, x):
                     self.data.appendleft(x)
                 def append(self, x):
                     self.data.append(x)
                 def extend(self, o):
                     self.data.extend(o)
                 def __getitem__(self, i):
                     return self.data[i]
             class SubprocessIOChunker(object):
                 '''
                 Processor class wrapping handling of subprocess IO.
                 In a way, this is a "communicate()" replacement with a twist.
                 - We are multithreaded. Writing in and reading out, err are all sep threads.
                 - We support concurrent (in and out) stream processing.
                 - The output is not a stream. It's a queue of read string (bytes, not unicode)
                   chunks. The object behaves as an iterable. You can "for chunk in obj:" us.
                 - We are non-blocking in more respects than communicate()
                   (reading from subprocess out pauses when internal buffer is full, but
                    does not block the parent calling code. On the flip side, reading from
                    slow-yielding subprocess may block the iteration until data shows up. This
                    does not block the parallel inpipe reading occurring parallel thread.)
                 The purpose of the object is to allow us to wrap subprocess interactions into
                 and interable that can be passed to a WSGI server as the application's return
                 value. Because of stream-processing-ability, WSGI does not have to read ALL
                 of the subprocess's output and buffer it, before handing it to WSGI server for
                 HTTP response. Instead, the class initializer reads just a bit of the stream
                 to figure out if error ocurred or likely to occur and if not, just hands the
                 further iteration over subprocess output to the server for completion of HTTP
                 response.
                 The real or perceived subprocess error is trapped and raised as one of
                 EnvironmentError family of exceptions
                 Example usage:
                 #    try:
                 #        answer = SubprocessIOChunker(
                 #            cmd,
                 #            input,
                 #            buffer_size = 65536,
                 #            chunk_size = 4096
                 #            )
                 #    except (EnvironmentError) as e:
                 #        print str(e)
                 #        raise e
                 #
                 #    return answer
                 '''
                 def __init__(self, cmd, inputstream=None, buffer_size=65536,
                              chunk_size=4096, starting_values=[], **kwargs):
                     '''
                     Initializes SubprocessIOChunker
                     :param cmd: A Subprocess.Popen style "cmd". Can be string or array of strings
                     :param inputstream: (Default: None) A file-like, string, or file pointer.
                     :param buffer_size: (Default: 65536) A size of total buffer per stream in bytes.
                     :param chunk_size: (Default: 4096) A max size of a chunk. Actual chunk may be smaller.
                     :param starting_values: (Default: []) An array of strings to put in front of output que.
                     '''
                     if inputstream:
                         input_streamer = StreamFeeder(inputstream)
                         input_streamer.start()
                         inputstream = input_streamer.output
+                    _shell = kwargs.get('shell', True)
                     if isinstance(cmd, (list, tuple)):
                         cmd = ' '.join(cmd)
-                    _shell = kwargs.get('shell') or True
                     kwargs['shell'] = _shell
                     _p = subprocess.Popen(cmd,
                         bufsize=-1,
                         stdin=inputstream,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         **kwargs
                     )
                     bg_out = BufferedGenerator(_p.stdout, buffer_size, chunk_size, starting_values)
                     bg_err = BufferedGenerator(_p.stderr, 16000, 1, bottomless=True)
                     while not bg_out.done_reading and not bg_out.reading_paused and not bg_err.length:
                         # doing this until we reach either end of file, or end of buffer.
                         bg_out.data_added_event.wait(1)
                         bg_out.data_added_event.clear()
                     # at this point it's still ambiguous if we are done reading or just full buffer.
                     # Either way, if error (returned by ended process, or implied based on
                     # presence of stuff in stderr output) we error out.
                     # Else, we are happy.
                     _returncode = _p.poll()
                     if _returncode or (_returncode == None and bg_err.length):
                         try:
                             _p.terminate()
                         except:
                             pass
                         bg_out.stop()
                         bg_err.stop()
                         err = '%s' % ''.join(bg_err)
                         if err:
                             raise EnvironmentError("Subprocess exited due to an error:\n" + err)
                         raise EnvironmentError("Subprocess exited with non 0 ret code:%s" % _returncode)
                     self.process = _p
                     self.output = bg_out
                     self.error = bg_err
                 def __iter__(self):
                     return self
                 def next(self):
                     if self.process.poll():
                         err = '%s' % ''.join(self.error)
                         raise EnvironmentError("Subprocess exited due to an error:\n" + err)
                     return self.output.next()
                 def throw(self, type, value=None, traceback=None):
                     if self.output.length or not self.output.done_reading:
                         raise type(value)
                 def close(self):
                     try:
                         self.process.terminate()
                     except:
                         pass
                     try:
                         self.output.close()
                     except:
                         pass
                     try:
                         self.error.close()
                     except:
                         pass
                 def __del__(self):
                     self.close()

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages