##// END OF EJS Templates
fixed diffs vs Empty changeset
fixed diffs vs Empty changeset

File last commit:

r3960:5293d4bb merge default
r4044:af733fa9 default
Show More
__init__.py
194 lines | 6.2 KiB | text/x-python | PythonLexer
code docs, updates
r903 # -*- coding: utf-8 -*-
"""
rhodecode.lib.indexers.__init__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Whoosh indexing module for RhodeCode
source code cleanup: remove trailing white space, normalize file endings
r1203
code docs, updates
r903 :created_on: Aug 17, 2010
:author: marcink
2012 copyrights
r1824 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
code docs, updates
r903 :license: GPLv3, see COPYING for more details.
"""
fixed license issue #149
r1206 # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
source code cleanup: remove trailing white space, normalize file endings
r1203 #
code docs, updates
r903 # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
source code cleanup: remove trailing white space, normalize file endings
r1203 #
code docs, updates
r903 # You should have received a copy of the GNU General Public License
fixed license issue #149
r1206 # along with this program. If not, see <http://www.gnu.org/licenses/>.
Hacking for git support,and new faster repo scan
r631 import os
import sys
Whoosh logging is now controlled by the .ini files logging setup
r2102 import logging
renamed project to rhodecode
r547 from os.path import dirname as dn, join as jn
Hacking for git support,and new faster repo scan
r631
#to get the rhodecode import
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
renamed project to rhodecode
r547 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
Implemented filtering of admin journal based on Whoosh Query language...
r3062 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
renamed project to rhodecode
r547 from whoosh.formats import Characters
moved make-index command to paster_commands module...
r3915 from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter
utils/conf...
r2109 from rhodecode.lib.utils2 import LazyProperty
renamed project to rhodecode
r547
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 log = logging.getLogger(__name__)
bumbed whoosh to 2.3.X series...
r1995 # CUSTOM ANALYZER wordsplit + lowercase filter
renamed project to rhodecode
r547 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
#INDEX SCHEMA DEFINITION
bumbed whoosh to 2.3.X series...
r1995 SCHEMA = Schema(
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files
r2388 fileid=ID(unique=True),
bumbed whoosh to 2.3.X series...
r1995 owner=TEXT(),
repository=TEXT(stored=True),
path=TEXT(stored=True),
content=FieldType(format=Characters(), analyzer=ANALYZER,
scorable=True, stored=True),
modtime=STORED(),
extension=TEXT(stored=True)
)
renamed project to rhodecode
r547
IDX_NAME = 'HG_INDEX'
Hacking for git support,and new faster repo scan
r631 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
bumbed whoosh to 2.3.X series...
r1995 FRAGMENTER = ContextFragmenter(200)
Hacking for git support,and new faster repo scan
r631
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 CHGSETS_SCHEMA = Schema(
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 raw_id=ID(unique=True, stored=True),
Extended commit search schema with date of commit
r2693 date=NUMERIC(stored=True),
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 last=BOOLEAN(),
owner=TEXT(),
repository=ID(unique=True, stored=True),
author=TEXT(stored=True),
message=FieldType(format=Characters(), analyzer=ANALYZER,
scorable=True, stored=True),
parents=TEXT(),
added=TEXT(),
removed=TEXT(),
changed=TEXT(),
)
CHGSET_IDX_NAME = 'CHGSET_INDEX'
Hacking for git support,and new faster repo scan
r631
Implemented filtering of admin journal based on Whoosh Query language...
r3062 # used only to generate queries in journal
JOURNAL_SCHEMA = Schema(
username=TEXT(),
date=DATETIME(),
action=TEXT(),
repository=TEXT(),
ip=TEXT(),
)
fixes issue #524...
r2718
fixes issue #454 Search results under Windows include preceeding backslash
r2319 class WhooshResultWrapper(object):
def __init__(self, search_type, searcher, matcher, highlight_items,
repo_location):
Added searching for file names within the repository in rhodecode
r556 self.search_type = search_type
renamed project to rhodecode
r547 self.searcher = searcher
self.matcher = matcher
self.highlight_items = highlight_items
bumbed whoosh to 2.3.X series...
r1995 self.fragment_size = 200
fixes issue #454 Search results under Windows include preceeding backslash
r2319 self.repo_location = repo_location
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 @LazyProperty
def doc_ids(self):
docs_id = []
while self.matcher.is_active():
docnum = self.matcher.id()
chunks = [offsets for offsets in self.get_chunks()]
docs_id.append([docnum, chunks])
self.matcher.next()
Hacking for git support,and new faster repo scan
r631 return docs_id
renamed project to rhodecode
r547 def __str__(self):
return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
def __repr__(self):
return self.__str__()
def __len__(self):
return len(self.doc_ids)
def __iter__(self):
"""
Allows Iteration over results,and lazy generate content
*Requires* implementation of ``__getitem__`` method.
"""
for docid in self.doc_ids:
yield self.get_full_content(docid)
fixed issue with latest webhelpers pagination module
r1198 def __getitem__(self, key):
renamed project to rhodecode
r547 """
Slicing of resultWrapper
"""
fixed issue with latest webhelpers pagination module
r1198 i, j = key.start, key.stop
bumbed whoosh to 2.3.X series...
r1995 slices = []
renamed project to rhodecode
r547 for docid in self.doc_ids[i:j]:
bumbed whoosh to 2.3.X series...
r1995 slices.append(self.get_full_content(docid))
return slices
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def get_full_content(self, docid):
res = self.searcher.stored_fields(docid[0])
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 log.debug('result: %s' % res)
if self.search_type == 'content':
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 full_repo_path = jn(self.repo_location, res['repository'])
f_path = res['path'].split(full_repo_path)[-1]
f_path = f_path.lstrip(os.sep)
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 content_short = self.get_short_content(res, docid[1])
res.update({'content_short': content_short,
Indra Talip
rename changeset index key to match raw_id rather than path for greater consistency
r2642 'content_short_hl': self.highlight(content_short),
'f_path': f_path
})
fixes issue #524...
r2718 elif self.search_type == 'path':
full_repo_path = jn(self.repo_location, res['repository'])
f_path = res['path'].split(full_repo_path)[-1]
f_path = f_path.lstrip(os.sep)
res.update({'f_path': f_path})
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 elif self.search_type == 'message':
res.update({'message_hl': self.highlight(res['message'])})
log.debug('result: %s' % res)
Hacking for git support,and new faster repo scan
r631
return res
renamed project to rhodecode
r547 def get_short_content(self, res, chunks):
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def get_chunks(self):
"""
Smart function that implements chunking the content
but not overlap chunks so it doesn't highlight the same
Added searching for file names within the repository in rhodecode
r556 close occurrences twice.
renamed project to rhodecode
r547 """
memory = [(0, 0)]
white space cleanup
r2673 if self.matcher.supports('positions'):
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 for span in self.matcher.spans():
start = span.startchar or 0
end = span.endchar or 0
start_offseted = max(0, start - self.fragment_size)
end_offseted = end + self.fragment_size
Hacking for git support,and new faster repo scan
r631
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 if start_offseted < memory[-1][1]:
start_offseted = memory[-1][1]
memory.append((start_offseted, end_offseted,))
yield (start_offseted, end_offseted,)
Hacking for git support,and new faster repo scan
r631
renamed project to rhodecode
r547 def highlight(self, content, top=5):
Indra Talip
create an index for commit messages and the ability to search them and see results
r2640 if self.search_type not in ['content', 'message']:
Added searching for file names within the repository in rhodecode
r556 return ''
moved make-index command to paster_commands module...
r3915 hl = whoosh_highlight(
UI fixes for searching
r2389 text=content,
bumbed whoosh to 2.3.X series...
r1995 terms=self.highlight_items,
analyzer=ANALYZER,
fragmenter=FRAGMENTER,
formatter=FORMATTER,
top=top
)
Hacking for git support,and new faster repo scan
r631 return hl