##// END OF EJS Templates
performance section docs update
performance section docs update

File last commit:

r3339:b76a595b beta
r3390:4cd84f4f beta
Show More
__init__.py
280 lines | 9.7 KiB | text/x-python | PythonLexer
code docs, updates
r903 # -*- coding: utf-8 -*-
"""
rhodecode.lib.indexers.__init__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Whoosh indexing module for RhodeCode
source code cleanup: remove trailing white space, normalize file endings
r1203
code docs, updates
r903 :created_on: Aug 17, 2010
:author: marcink
2012 copyrights
r1824 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
code docs, updates
r903 :license: GPLv3, see COPYING for more details.
"""
fixed license issue #149
r1206 # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
source code cleanup: remove trailing white space, normalize file endings
r1203 #
code docs, updates
r903 # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
source code cleanup: remove trailing white space, normalize file endings
r1203 #
code docs, updates
r903 # You should have received a copy of the GNU General Public License
fixed license issue #149
r1206 # along with this program. If not, see <http://www.gnu.org/licenses/>.
Hacking for git support,and new faster repo scan
r631 import os
import sys
complete rewrite of paster commands,...
r785 import traceback
Whoosh logging is now controlled by the .ini files logging setup
r2102 import logging
renamed project to rhodecode
r547 from os.path import dirname as dn, join as jn
Hacking for git support,and new faster repo scan
r631
#to get the rhodecode import
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
fixes #90 + docs update
r894 from string import strip
renamed project to rhodecode
r547 from shutil import rmtree
complete rewrite of paster commands,...
r785
renamed project to rhodecode
r547 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
Implemented filtering of admin journal based on Whoosh Query language...
r3062 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
renamed project to rhodecode
r547 from whoosh.index import create_in, open_dir
from whoosh.formats import Characters
bumbed whoosh to 2.3.X series...
r1995 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
renamed project to rhodecode
r547
UI fixes for searching
r2389 from webhelpers.html.builder import escape, literal
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
r1302 from sqlalchemy import engine_from_config
from rhodecode.model import init_model
from rhodecode.model.scm import ScmModel
Jared Bunting
Allowing indexing job to resolve repos path on its own if not given.
r1407 from rhodecode.model.repo import RepoModel
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
r1302 from rhodecode.config.environment import load_environment
utils/conf...
r2109 from rhodecode.lib.utils2 import LazyProperty
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
load_rcextensions
renamed project to rhodecode
r547
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 log = logging.getLogger(__name__)
bumbed whoosh to 2.3.X series...
r1995 # CUSTOM ANALYZER wordsplit + lowercase filter
renamed project to rhodecode
r547 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
#INDEX SCHEMA DEFINITION
bumbed whoosh to 2.3.X series...
r1995 SCHEMA = Schema(
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
r2388 fileid=ID(unique=True),
bumbed whoosh to 2.3.X series...
r1995 owner=TEXT(),
repository=TEXT(stored=True),
path=TEXT(stored=True),
content=FieldType(format=Characters(), analyzer=ANALYZER,
scorable=True, stored=True),
modtime=STORED(),
extension=TEXT(stored=True)
)
renamed project to rhodecode
r547
IDX_NAME = 'HG_INDEX'
Hacking for git support,and new faster repo scan
r631 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
bumbed whoosh to 2.3.X series...
r1995 FRAGMENTER = ContextFragmenter(200)
Hacking for git support,and new faster repo scan
r631
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 CHGSETS_SCHEMA = Schema(
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 raw_id=ID(unique=True, stored=True),
Extended commit search schema with date of commit
r2693 date=NUMERIC(stored=True),
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 last=BOOLEAN(),
owner=TEXT(),
repository=ID(unique=True, stored=True),
author=TEXT(stored=True),
message=FieldType(format=Characters(), analyzer=ANALYZER,
scorable=True, stored=True),
parents=TEXT(),
added=TEXT(),
removed=TEXT(),
changed=TEXT(),
)
CHGSET_IDX_NAME = 'CHGSET_INDEX'
Hacking for git support,and new faster repo scan
r631
Implemented filtering of admin journal based on Whoosh Query language...
r3062 # used only to generate queries in journal
JOURNAL_SCHEMA = Schema(
username=TEXT(),
date=DATETIME(),
action=TEXT(),
repository=TEXT(),
ip=TEXT(),
)
fixes issue #524...
r2718
complete rewrite of paster commands,...
r785 class MakeIndex(BasePasterCommand):
Hacking for git support,and new faster repo scan
r631
max_args = 1
min_args = 1
usage = "CONFIG_FILE"
pasters RhodeCode commands help text improvements
r3339 summary = "Creates or update full text search index"
Implemented whoosh index building as paster command....
r683 group_name = "RhodeCode"
takes_config_file = -1
complete rewrite of paster commands,...
r785 parser = Command.standard_parser(verbose=True)
Hacking for git support,and new faster repo scan
r631 def command(self):
Whoosh logging is now controlled by the .ini files logging setup
r2102 logging.config.fileConfig(self.path_to_ini_file)
complete rewrite of paster commands,...
r785 from pylons import config
add_cache(config)
engine = engine_from_config(config, 'sqlalchemy.db1.')
init_model(engine)
index_location = config['index_dir']
Updated contributors and fixed index line length
r1409 repo_location = self.options.repo_location \
if self.options.repo_location else RepoModel().repos_path
fixes issue #146
r1183 repo_list = map(strip, self.options.repo_list.split(',')) \
if self.options.repo_list else None
#469 added --update-only option to whoosh to re-index only given list...
r2373 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
if self.options.repo_update_list else None
utils/conf...
r2109 load_rcextensions(config['here'])
Implemented whoosh index building as paster command....
r683 #======================================================================
Hacking for git support,and new faster repo scan
r631 # WHOOSH DAEMON
Implemented whoosh index building as paster command....
r683 #======================================================================
Hacking for git support,and new faster repo scan
r631 from rhodecode.lib.pidlock import LockHeld, DaemonLock
from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
try:
fixes #258 RhodeCode 1.2 assumes egg folder is writable
r1540 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
Implemented whoosh index building as paster command....
r683 WhooshIndexingDaemon(index_location=index_location,
fixes #90 + docs update
r894 repo_location=repo_location,
#469 added --update-only option to whoosh to re-index only given list...
r2373 repo_list=repo_list,
repo_update_list=repo_update_list)\
Hacking for git support,and new faster repo scan
r631 .run(full_index=self.options.full_index)
l.release()
except LockHeld:
sys.exit(1)
complete rewrite of paster commands,...
r785 def update_parser(self):
self.parser.add_option('--repo-location',
action='store',
dest='repo_location',
Jared Bunting
Adding documentation for indexer's self-resolving repos location.
r1408 help="Specifies repositories location to index OPTIONAL",
complete rewrite of paster commands,...
r785 )
fixes #90 + docs update
r894 self.parser.add_option('--index-only',
action='store',
dest='repo_list',
help="Specifies a comma separated list of repositores "
#469 added --update-only option to whoosh to re-index only given list...
r2373 "to build index on. If not given all repositories "
"are scanned for indexing. OPTIONAL",
)
self.parser.add_option('--update-only',
action='store',
dest='repo_update_list',
help="Specifies a comma separated list of repositores "
"to re-build index on. OPTIONAL",
fixes #90 + docs update
r894 )
complete rewrite of paster commands,...
r785 self.parser.add_option('-f',
action='store_true',
dest='full_index',
help="Specifies that index should be made full i.e"
" destroy old and build from scratch",
default=False)
Hacking for git support,and new faster repo scan
r631
implements #330 api method for listing nodes at particular revision...
r1810
fixes issue #454 Search results under Windows include preceeding backslash
r2319 class WhooshResultWrapper(object):
def __init__(self, search_type, searcher, matcher, highlight_items,
repo_location):
Added searching for file names within the repository in rhodecode
r556 self.search_type = search_type
renamed project to rhodecode
r547 self.searcher = searcher
self.matcher = matcher
self.highlight_items = highlight_items
bumbed whoosh to 2.3.X series...
r1995 self.fragment_size = 200
fixes issue #454 Search results under Windows include preceeding backslash
r2319 self.repo_location = repo_location
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 @LazyProperty
def doc_ids(self):
docs_id = []
while self.matcher.is_active():
docnum = self.matcher.id()
chunks = [offsets for offsets in self.get_chunks()]
docs_id.append([docnum, chunks])
self.matcher.next()
Hacking for git support,and new faster repo scan
r631 return docs_id
renamed project to rhodecode
r547 def __str__(self):
return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
def __repr__(self):
return self.__str__()
def __len__(self):
return len(self.doc_ids)
def __iter__(self):
"""
Allows Iteration over results,and lazy generate content
*Requires* implementation of ``__getitem__`` method.
"""
for docid in self.doc_ids:
yield self.get_full_content(docid)
fixed issue with latest webhelpers pagination module
r1198 def __getitem__(self, key):
renamed project to rhodecode
r547 """
Slicing of resultWrapper
"""
fixed issue with latest webhelpers pagination module
r1198 i, j = key.start, key.stop
bumbed whoosh to 2.3.X series...
r1995 slices = []
renamed project to rhodecode
r547 for docid in self.doc_ids[i:j]:
bumbed whoosh to 2.3.X series...
r1995 slices.append(self.get_full_content(docid))
return slices
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def get_full_content(self, docid):
res = self.searcher.stored_fields(docid[0])
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 log.debug('result: %s' % res)
if self.search_type == 'content':
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 full_repo_path = jn(self.repo_location, res['repository'])
f_path = res['path'].split(full_repo_path)[-1]
f_path = f_path.lstrip(os.sep)
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 content_short = self.get_short_content(res, docid[1])
res.update({'content_short': content_short,
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 'content_short_hl': self.highlight(content_short),
'f_path': f_path
})
fixes issue #524...
r2718 elif self.search_type == 'path':
full_repo_path = jn(self.repo_location, res['repository'])
f_path = res['path'].split(full_repo_path)[-1]
f_path = f_path.lstrip(os.sep)
res.update({'f_path': f_path})
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 elif self.search_type == 'message':
res.update({'message_hl': self.highlight(res['message'])})
log.debug('result: %s' % res)
Hacking for git support,and new faster repo scan
r631
return res
renamed project to rhodecode
r547 def get_short_content(self, res, chunks):
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def get_chunks(self):
"""
Smart function that implements chunking the content
but not overlap chunks so it doesn't highlight the same
Added searching for file names within the repository in rhodecode
r556 close occurrences twice.
implements #330 api method for listing nodes at particular revision...
r1810
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
r1302 :param matcher:
:param size:
renamed project to rhodecode
r547 """
memory = [(0, 0)]
white space cleanup
r2673 if self.matcher.supports('positions'):
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 for span in self.matcher.spans():
start = span.startchar or 0
end = span.endchar or 0
start_offseted = max(0, start - self.fragment_size)
end_offseted = end + self.fragment_size
Hacking for git support,and new faster repo scan
r631
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 if start_offseted < memory[-1][1]:
start_offseted = memory[-1][1]
memory.append((start_offseted, end_offseted,))
yield (start_offseted, end_offseted,)
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def highlight(self, content, top=5):
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 if self.search_type not in ['content', 'message']:
Added searching for file names within the repository in rhodecode
r556 return ''
bumbed whoosh to 2.3.X series...
r1995 hl = highlight(
UI fixes for searching
r2389 text=content,
bumbed whoosh to 2.3.X series...
r1995 terms=self.highlight_items,
analyzer=ANALYZER,
fragmenter=FRAGMENTER,
formatter=FORMATTER,
top=top
)
Hacking for git support,and new faster repo scan
r631 return hl