u/ewong/rhodecode-enterprise-ce-fork Commit - r1411:16beb154

search: goto commit search will now use a safe search option and never...

marcink -

r1411:16beb154 default

parent child

rhodecode/controllers/home.py

0 +4 -2

              # -*- coding: utf-8 -*-
              # Copyright (C) 2010-2017 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              """
              Home controller for RhodeCode Enterprise
              """
              import logging
              import time
              import re
              from pylons import tmpl_context as c, request, url, config
              from pylons.i18n.translation import _
              from sqlalchemy.sql import func
              from rhodecode.lib.auth import (
                  LoginRequired, HasPermissionAllDecorator, AuthUser,
                  HasRepoGroupPermissionAnyDecorator, XHRRequired)
              from rhodecode.lib.base import BaseController, render
              from rhodecode.lib.index import searcher_from_config
              from rhodecode.lib.ext_json import json
              from rhodecode.lib.utils import jsonify
              from rhodecode.lib.utils2 import safe_unicode, str2bool
              from rhodecode.model.db import Repository, RepoGroup
              from rhodecode.model.repo import RepoModel
              from rhodecode.model.repo_group import RepoGroupModel
              from rhodecode.model.scm import RepoList, RepoGroupList
              log = logging.getLogger(__name__)
              class HomeController(BaseController):
                  def __before__(self):
                      super(HomeController, self).__before__()
                  def ping(self):
                      """
                      Ping, doesn't require login, good for checking out the platform
                      """
                      instance_id = getattr(c, 'rhodecode_instanceid', '')
                      return 'pong[%s] => %s' % (instance_id, self.ip_addr,)
                  @LoginRequired()
                  @HasPermissionAllDecorator('hg.admin')
                  def error_test(self):
                      """
                      Test exception handling and emails on errors
                      """
                      class TestException(Exception):
                          pass
                      msg = ('RhodeCode Enterprise %s test exception. Generation time: %s'
                             % (c.rhodecode_name, time.time()))
                      raise TestException(msg)
                  def _get_groups_and_repos(self, repo_group_id=None):
                      # repo groups groups
                      repo_group_list = RepoGroup.get_all_repo_groups(group_id=repo_group_id)
                      _perms = ['group.read', 'group.write', 'group.admin']
                      repo_group_list_acl = RepoGroupList(repo_group_list, perm_set=_perms)
                      repo_group_data = RepoGroupModel().get_repo_groups_as_dict(
                          repo_group_list=repo_group_list_acl, admin=False)
                      # repositories
                      repo_list = Repository.get_all_repos(group_id=repo_group_id)
                      _perms = ['repository.read', 'repository.write', 'repository.admin']
                      repo_list_acl = RepoList(repo_list, perm_set=_perms)
                      repo_data = RepoModel().get_repos_as_dict(
                          repo_list=repo_list_acl, admin=False)
                      return repo_data, repo_group_data
                  @LoginRequired()
                  def index(self):
                      c.repo_group = None
                      repo_data, repo_group_data = self._get_groups_and_repos()
                      # json used to render the grids
                      c.repos_data = json.dumps(repo_data)
                      c.repo_groups_data = json.dumps(repo_group_data)
                      return render('/index.mako')
                  @LoginRequired()
                  @HasRepoGroupPermissionAnyDecorator('group.read', 'group.write',
                                                      'group.admin')
                  def index_repo_group(self, group_name):
                      """GET /repo_group_name: Show a specific item"""
                      c.repo_group = RepoGroupModel()._get_repo_group(group_name)
                      repo_data, repo_group_data = self._get_groups_and_repos(
                          c.repo_group.group_id)
                      # json used to render the grids
                      c.repos_data = json.dumps(repo_data)
                      c.repo_groups_data = json.dumps(repo_group_data)
                      return render('index_repo_group.mako')
                  def _get_repo_list(self, name_contains=None, repo_type=None, limit=20):
                      query = Repository.query()\
                          .order_by(func.length(Repository.repo_name))\
                          .order_by(Repository.repo_name)
                      if repo_type:
                          query = query.filter(Repository.repo_type == repo_type)
                      if name_contains:
                          ilike_expression = u'%{}%'.format(safe_unicode(name_contains))
                          query = query.filter(
                              Repository.repo_name.ilike(ilike_expression))
                          query = query.limit(limit)
                      all_repos = query.all()
                      repo_iter = self.scm_model.get_repos(all_repos)
                      return [
                          {
                              'id': obj['name'],
                              'text': obj['name'],
                              'type': 'repo',
                              'obj': obj['dbrepo'],
                              'url': url('summary_home', repo_name=obj['name'])
                          }
                          for obj in repo_iter]
                  def _get_repo_group_list(self, name_contains=None, limit=20):
                      query = RepoGroup.query()\
                          .order_by(func.length(RepoGroup.group_name))\
                          .order_by(RepoGroup.group_name)
                      if name_contains:
                          ilike_expression = u'%{}%'.format(safe_unicode(name_contains))
                          query = query.filter(
                              RepoGroup.group_name.ilike(ilike_expression))
                          query = query.limit(limit)
                      all_groups = query.all()
                      repo_groups_iter = self.scm_model.get_repo_groups(all_groups)
                      return [
                          {
                              'id': obj.group_name,
                              'text': obj.group_name,
                              'type': 'group',
                              'obj': {},
                              'url': url('repo_group_home', group_name=obj.group_name)
                          }
                          for obj in repo_groups_iter]
                  def _get_hash_commit_list(self, hash_starts_with=None, limit=20):
                      if not hash_starts_with or len(hash_starts_with) < 3:
                          return []
                      commit_hashes = re.compile('([0-9a-f]{2,40})').findall(hash_starts_with)
                      if len(commit_hashes) != 1:
                          return []
                      commit_hash_prefix = commit_hashes[0]
                      auth_user = AuthUser(
                          user_id=c.rhodecode_user.user_id, ip_addr=self.ip_addr)
                      searcher = searcher_from_config(config)
                      result = searcher.search(
-                         'commit_id:%s*' % commit_hash_prefix, 'commit', auth_user)
+                         'commit_id:%s*' % commit_hash_prefix, 'commit', auth_user,
+                         raise_on_exc=False)
                      return [
                          {
                              'id': entry['commit_id'],
                              'text': entry['commit_id'],
                              'type': 'commit',
                              'obj': {'repo': entry['repository']},
                              'url': url('changeset_home',
-                                 repo_name=entry['repository'], revision=entry['commit_id'])
+                                        repo_name=entry['repository'],
+                                        revision=entry['commit_id'])
                          }
                          for entry in result['results']]
                  @LoginRequired()
                  @XHRRequired()
                  @jsonify
                  def goto_switcher_data(self):
                      query = request.GET.get('query')
                      log.debug('generating goto switcher list, query %s', query)
                      res = []
                      repo_groups = self._get_repo_group_list(query)
                      if repo_groups:
                          res.append({
                              'text': _('Groups'),
                              'children': repo_groups
                          })
                      repos = self._get_repo_list(query)
                      if repos:
                          res.append({
                              'text': _('Repositories'),
                              'children': repos
                          })
                      commits = self._get_hash_commit_list(query)
                      if commits:
                          unique_repos = {}
                          for commit in commits:
                              unique_repos.setdefault(commit['obj']['repo'], []
                                  ).append(commit)
                          for repo in unique_repos:
                              res.append({
                                  'text': _('Commits in %(repo)s') % {'repo': repo},
                                  'children': unique_repos[repo]
                              })
                      data = {
                          'more': False,
                          'results': res
                      }
                      return data
                  @LoginRequired()
                  @XHRRequired()
                  @jsonify
                  def repo_list_data(self):
                      query = request.GET.get('query')
                      repo_type = request.GET.get('repo_type')
                      log.debug('generating repo list, query:%s', query)
                      res = []
                      repos = self._get_repo_list(query, repo_type=repo_type)
                      if repos:
                          res.append({
                              'text': _('Repositories'),
                              'children': repos
                          })
                      data = {
                          'more': False,
                          'results': res
                      }
                      return data
                  @LoginRequired()
                  @XHRRequired()
                  @jsonify
                  def user_autocomplete_data(self):
                      query = request.GET.get('query')
                      active = str2bool(request.GET.get('active') or True)
                      repo_model = RepoModel()
                      _users = repo_model.get_users(
                          name_contains=query, only_active=active)
                      if request.GET.get('user_groups'):
                          # extend with user groups
                          _user_groups = repo_model.get_user_groups(
                              name_contains=query, only_active=active)
                          _users = _users + _user_groups
                      return {'suggestions': _users}
                  @LoginRequired()
                  @XHRRequired()
                  @jsonify
                  def user_group_autocomplete_data(self):
                      query = request.GET.get('query')
                      active = str2bool(request.GET.get('active') or True)
                      repo_model = RepoModel()
                      _user_groups = repo_model.get_user_groups(
                          name_contains=query, only_active=active)
                      _user_groups = _user_groups
                      return {'suggestions': _user_groups}

rhodecode/lib/index/__init__.py

0 +3 -1

              # -*- coding: utf-8 -*-
              # Copyright (C) 2012-2017 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              """
              Index schema for RhodeCode
              """
              import importlib
              import logging
              log = logging.getLogger(__name__)
              # leave defaults for backward compat
              default_searcher = 'rhodecode.lib.index.whoosh'
              default_location = '%(here)s/data/index'
              class BaseSearch(object):
                  def __init__(self):
                      pass
                  def cleanup(self):
                      pass
-                 def search(self, query, document_type, search_user, repo_name=None):
+                 def search(self, query, document_type, search_user, repo_name=None,
+                            raise_on_exc=True):
                      raise Exception('NotImplemented')
              def searcher_from_config(config, prefix='search.'):
                  _config = {}
                  for key in config.keys():
                      if key.startswith(prefix):
                          _config[key[len(prefix):]] = config[key]
                  if 'location' not in _config:
                      _config['location'] = default_location
                  imported = importlib.import_module(_config.get('module', default_searcher))
                  searcher = imported.Search(config=_config)
                  return searcher

rhodecode/lib/index/whoosh.py

0 +4 -3

              # -*- coding: utf-8 -*-
              # Copyright (C) 2012-2017 RhodeCode GmbH
              #
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU Affero General Public License, version 3
              # (only), as published by the Free Software Foundation.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU Affero General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              #
              # This program is dual-licensed. If you wish to learn more about the
              # RhodeCode Enterprise Edition, including its added features, Support services,
              # and proprietary license terms, please see https://rhodecode.com/licenses/
              """
              Index schema for RhodeCode
              """
              from __future__ import absolute_import
              import logging
              import os
              import re
              from pylons.i18n.translation import _
              from whoosh import query as query_lib, sorting
              from whoosh.highlight import HtmlFormatter, ContextFragmenter
              from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
              from whoosh.qparser import QueryParser, QueryParserError
              import rhodecode.lib.helpers as h
              from rhodecode.lib.index import BaseSearch
              log = logging.getLogger(__name__)
              try:
                  # we first try to import from rhodecode tools, fallback to copies if
                  # we're unable to
                  from rhodecode_tools.lib.fts_index.whoosh_schema import (
                      ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
                      COMMIT_SCHEMA)
              except ImportError:
                  log.warning('rhodecode_tools schema not available, doing a fallback '
                              'import from `rhodecode.lib.index.whoosh_fallback_schema`')
                  from rhodecode.lib.index.whoosh_fallback_schema import (
                      ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
                      COMMIT_SCHEMA)
              FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
              FRAGMENTER = ContextFragmenter(200)
              log = logging.getLogger(__name__)
              class Search(BaseSearch):
                  name = 'whoosh'
                  def __init__(self, config):
+                     super(Search, self).__init__()
                      self.config = config
                      if not os.path.isdir(self.config['location']):
                          os.makedirs(self.config['location'])
                      opener = create_in
                      if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
                          opener = open_dir
                      file_index = opener(self.config['location'], schema=FILE_SCHEMA,
                                          indexname=FILE_INDEX_NAME)
                      opener = create_in
                      if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
                          opener = open_dir
                      changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
                                               indexname=COMMIT_INDEX_NAME)
                      self.commit_schema = COMMIT_SCHEMA
                      self.commit_index = changeset_index
                      self.file_schema = FILE_SCHEMA
                      self.file_index = file_index
                      self.searcher = None
                  def cleanup(self):
                      if self.searcher:
                          self.searcher.close()
                  def _extend_query(self, query):
                      hashes = re.compile('([0-9a-f]{5,40})').findall(query)
                      if hashes:
                          hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
                          query = u'(%s) OR %s' % (query, hashes_or_query)
                      return query
-                 def search(self, query, document_type, search_user, repo_name=None,
-                     requested_page=1, page_limit=10, sort=None):
+                 def search(self, query, document_type, search_user,
+                            repo_name=None, requested_page=1, page_limit=10, sort=None,
+                            raise_on_exc=True):
                      original_query = query
                      query = self._extend_query(query)
                      log.debug(u'QUERY: %s on %s', query, document_type)
                      result = {
                          'results': [],
                          'count': 0,
                          'error': None,
                          'runtime': 0
                      }
                      search_type, index_name, schema_defn = self._prepare_for_search(
                          document_type)
                      self._init_searcher(index_name)
                      try:
                          qp = QueryParser(search_type, schema=schema_defn)
                          allowed_repos_filter = self._get_repo_filter(
                              search_user, repo_name)
                          try:
                              query = qp.parse(unicode(query))
                              log.debug('query: %s (%s)' % (query, repr(query)))
                              reverse, sortedby = False, None
                              if search_type == 'message':
                                  if sort == 'oldfirst':
                                      sortedby = 'date'
                                      reverse = False
                                  elif sort == 'newfirst':
                                      sortedby = 'date'
                                      reverse = True
                              whoosh_results = self.searcher.search(
                                  query, filter=allowed_repos_filter, limit=None,
                                  sortedby=sortedby, reverse=reverse)
                              # fixes for 32k limit that whoosh uses for highlight
                              whoosh_results.fragmenter.charlimit = None
                              res_ln = whoosh_results.scored_length()
                              result['runtime'] = whoosh_results.runtime
                              result['count'] = res_ln
                              result['results'] = WhooshResultWrapper(
                                  search_type, res_ln, whoosh_results)
                          except QueryParserError:
                              result['error'] = _('Invalid search query. Try quoting it.')
                      except (EmptyIndexError, IOError, OSError):
                          msg = _('There is no index to search in. '
                                  'Please run whoosh indexer')
                          log.exception(msg)
                          result['error'] = msg
                      except Exception:
                          msg = _('An error occurred during this search operation')
                          log.exception(msg)
                          result['error'] = msg
                      return result
                  def statistics(self):
                      stats = [
                          {'key': _('Index Type'), 'value': 'Whoosh'},
                          {'key': _('File Index'), 'value': str(self.file_index)},
                          {'key': _('Indexed documents'),
                           'value': self.file_index.doc_count()},
                          {'key': _('Last update'),
                           'value': h.time_to_datetime(self.file_index.last_modified())},
                          {'key': _('Commit index'), 'value': str(self.commit_index)},
                          {'key': _('Indexed documents'),
                           'value': str(self.commit_index.doc_count())},
                          {'key': _('Last update'),
                           'value': h.time_to_datetime(self.commit_index.last_modified())}
                      ]
                      return stats
                  def _get_repo_filter(self, auth_user, repo_name):
                      allowed_to_search = [
                          repo for repo, perm in
                          auth_user.permissions['repositories'].items()
                          if perm != 'repository.none']
                      if repo_name:
                          repo_filter = [query_lib.Term('repository', repo_name)]
                      elif 'hg.admin' in auth_user.permissions.get('global', []):
                          return None
                      else:
                          repo_filter = [query_lib.Term('repository', _rn)
                                         for _rn in allowed_to_search]
                          # in case we're not allowed to search anywhere, it's a trick
                          # to tell whoosh we're filtering, on ALL results
                          repo_filter = repo_filter or [query_lib.Term('repository', '')]
                      return query_lib.Or(repo_filter)
                  def _prepare_for_search(self, cur_type):
                      search_type = {
                          'content': 'content',
                          'commit': 'message',
                          'path': 'path',
                          'repository': 'repository'
                      }.get(cur_type, 'content')
                      index_name = {
                          'content': FILE_INDEX_NAME,
                          'commit': COMMIT_INDEX_NAME,
                          'path': FILE_INDEX_NAME
                      }.get(cur_type, FILE_INDEX_NAME)
                      schema_defn = {
                          'content': self.file_schema,
                          'commit': self.commit_schema,
                          'path': self.file_schema
                      }.get(cur_type, self.file_schema)
                      log.debug('IDX: %s' % index_name)
                      log.debug('SCHEMA: %s' % schema_defn)
                      return search_type, index_name, schema_defn
                  def _init_searcher(self, index_name):
                      idx = open_dir(self.config['location'], indexname=index_name)
                      self.searcher = idx.searcher()
                      return self.searcher
              class WhooshResultWrapper(object):
                  def __init__(self, search_type, total_hits, results):
                      self.search_type = search_type
                      self.results = results
                      self.total_hits = total_hits
                  def __str__(self):
                      return '<%s at %s>' % (self.__class__.__name__, len(self))
                  def __repr__(self):
                      return self.__str__()
                  def __len__(self):
                      return self.total_hits
                  def __iter__(self):
                      """
                      Allows Iteration over results,and lazy generate content
                      *Requires* implementation of ``__getitem__`` method.
                      """
                      for hit in self.results:
                          yield self.get_full_content(hit)
                  def __getitem__(self, key):
                      """
                      Slicing of resultWrapper
                      """
                      i, j = key.start, key.stop
                      for hit in self.results[i:j]:
                          yield self.get_full_content(hit)
                  def get_full_content(self, hit):
                      # TODO: marcink: this feels like an overkill, there's a lot of data
                      # inside hit object, and we don't need all
                      res = dict(hit)
                      f_path = ''  # noqa
                      if self.search_type in ['content', 'path']:
                          f_path = res['path'][len(res['repository']):]
                          f_path = f_path.lstrip(os.sep)
                      if self.search_type == 'content':
                          res.update({'content_short_hl': hit.highlights('content'),
                                      'f_path': f_path})
                      elif self.search_type == 'path':
                          res.update({'f_path': f_path})
                      elif self.search_type == 'message':
                          res.update({'message_hl': hit.highlights('message')})
                      return res

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages