upstream/kallithea Files · rhodecode/lib/indexers/__init__.py

fixed small syntax error in css file

marcink - - Load All Authors

File last commit:

r3960:5293d4bb default


                r4004:6daec3bc

default

Download file

             __init__.py
        
                    194 lines
            
             | 6.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / indexers / __init__.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
code docs, updates

              r903
            
      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.indexers.__init__

          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

          Whoosh indexing module for RhodeCode

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
        marcink
    
code docs, updates

              r903
            
          :created_on: Aug 17, 2010

          :author: marcink

        marcink
    
2012 copyrights

              r1824
            
          :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>

        marcink
    
code docs, updates

              r903
            
          :license: GPLv3, see COPYING for more details.

      """

        marcink
    
fixed license  issue #149

              r1206
            
      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
      #

        marcink
    
code docs, updates

              r903
            
      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
      #

        marcink
    
code docs, updates

              r903
            
      # You should have received a copy of the GNU General Public License

        marcink
    
fixed license  issue #149

              r1206
            
      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      import os

      import sys

        marcink
    
Whoosh logging is now controlled by the .ini files logging setup

              r2102
            
      import logging

        marcink
    
renamed project to rhodecode

              r547
            
      from os.path import dirname as dn, join as jn

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      #to get the rhodecode import

      sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

        marcink
    
renamed project to rhodecode

              r547
            
      from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter

        marcink
    
Implemented filtering of admin journal based on Whoosh Query language...

              r3062
            
      from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME

        marcink
    
renamed project to rhodecode

              r547
            
      from whoosh.formats import Characters

        marcink
    
moved make-index command to paster_commands module...

              r3915
            
      from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter

        marcink
    
utils/conf...

              r2109
            
      from rhodecode.lib.utils2 import LazyProperty

        marcink
    
renamed project to rhodecode

              r547
            
        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
      log = logging.getLogger(__name__)

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      # CUSTOM ANALYZER wordsplit + lowercase filter

        marcink
    
renamed project to rhodecode

              r547
            
      ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()

      #INDEX SCHEMA DEFINITION

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      SCHEMA = Schema(

        marcink
    
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files

              r2388
            
          fileid=ID(unique=True),

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
          owner=TEXT(),

          repository=TEXT(stored=True),

          path=TEXT(stored=True),

          content=FieldType(format=Characters(), analyzer=ANALYZER,

                            scorable=True, stored=True),

          modtime=STORED(),

          extension=TEXT(stored=True)

      )

        marcink
    
renamed project to rhodecode

              r547
            
      IDX_NAME = 'HG_INDEX'

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      FRAGMENTER = ContextFragmenter(200)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
      CHGSETS_SCHEMA = Schema(

        Indra Talip
    
rename changeset index key to match raw_id rather than path for greater consistency

              r2642
            
          raw_id=ID(unique=True, stored=True),

        marcink
    
Extended commit search schema with date of commit

              r2693
            
          date=NUMERIC(stored=True),

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
          last=BOOLEAN(),

          owner=TEXT(),

          repository=ID(unique=True, stored=True),

          author=TEXT(stored=True),

          message=FieldType(format=Characters(), analyzer=ANALYZER,

                            scorable=True, stored=True),

          parents=TEXT(),

          added=TEXT(),

          removed=TEXT(),

          changed=TEXT(),

      )

      CHGSET_IDX_NAME = 'CHGSET_INDEX'

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
Implemented filtering of admin journal based on Whoosh Query language...

              r3062
            
      # used only to generate queries in journal

      JOURNAL_SCHEMA = Schema(

          username=TEXT(),

          date=DATETIME(),

          action=TEXT(),

          repository=TEXT(),

          ip=TEXT(),

      )

        marcink
    
fixes issue #524...

              r2718
            
        marcink
    
fixes issue #454 Search results under Windows include preceeding backslash

              r2319
            
      class WhooshResultWrapper(object):

          def __init__(self, search_type, searcher, matcher, highlight_items,

                       repo_location):

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
              self.search_type = search_type

        marcink
    
renamed project to rhodecode

              r547
            
              self.searcher = searcher

              self.matcher = matcher

              self.highlight_items = highlight_items

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              self.fragment_size = 200

        marcink
    
fixes issue #454 Search results under Windows include preceeding backslash

              r2319
            
              self.repo_location = repo_location

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          @LazyProperty

          def doc_ids(self):

              docs_id = []

              while self.matcher.is_active():

                  docnum = self.matcher.id()

                  chunks = [offsets for offsets in self.get_chunks()]

                  docs_id.append([docnum, chunks])

                  self.matcher.next()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return docs_id

        marcink
    
renamed project to rhodecode

              r547
            
          def __str__(self):

              return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

          def __repr__(self):

              return self.__str__()

          def __len__(self):

              return len(self.doc_ids)

          def __iter__(self):

              """

              Allows Iteration over results,and lazy generate content

              *Requires* implementation of ``__getitem__`` method.

              """

              for docid in self.doc_ids:

                  yield self.get_full_content(docid)

        marcink
    
fixed issue with latest webhelpers pagination module

              r1198
            
          def __getitem__(self, key):

        marcink
    
renamed project to rhodecode

              r547
            
              """

              Slicing of resultWrapper

              """

        marcink
    
fixed issue with latest webhelpers pagination module

              r1198
            
              i, j = key.start, key.stop

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              slices = []

        marcink
    
renamed project to rhodecode

              r547
            
              for docid in self.doc_ids[i:j]:

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
                  slices.append(self.get_full_content(docid))

              return slices

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def get_full_content(self, docid):

              res = self.searcher.stored_fields(docid[0])

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
              log.debug('result: %s' % res)

              if self.search_type == 'content':

        Indra Talip
    
rename changeset index key to match raw_id rather than path for greater consistency

              r2642
            
                  full_repo_path = jn(self.repo_location, res['repository'])

                  f_path = res['path'].split(full_repo_path)[-1]

                  f_path = f_path.lstrip(os.sep)

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
                  content_short = self.get_short_content(res, docid[1])

                  res.update({'content_short': content_short,

        Indra Talip
    
rename changeset index key to match raw_id rather than path for greater consistency

              r2642
            
                              'content_short_hl': self.highlight(content_short),

                              'f_path': f_path

                            })

        marcink
    
fixes issue #524...

              r2718
            
              elif self.search_type == 'path':

                  full_repo_path = jn(self.repo_location, res['repository'])

                  f_path = res['path'].split(full_repo_path)[-1]

                  f_path = f_path.lstrip(os.sep)

                  res.update({'f_path': f_path})

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
              elif self.search_type == 'message':

                  res.update({'message_hl': self.highlight(res['message'])})

              log.debug('result: %s' % res)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return res

        marcink
    
renamed project to rhodecode

              r547
            
          def get_short_content(self, res, chunks):

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def get_chunks(self):

              """

              Smart function that implements chunking the content

              but not overlap chunks so it doesn't highlight the same

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
              close occurrences twice.

        marcink
    
renamed project to rhodecode

              r547
            
              """

              memory = [(0, 0)]

        marcink
    
white space cleanup

              r2673
            
              if self.matcher.supports('positions'):

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
                  for span in self.matcher.spans():

                      start = span.startchar or 0

                      end = span.endchar or 0

                      start_offseted = max(0, start - self.fragment_size)

                      end_offseted = end + self.fragment_size

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
                      if start_offseted < memory[-1][1]:

                          start_offseted = memory[-1][1]

                      memory.append((start_offseted, end_offseted,))

                      yield (start_offseted, end_offseted,)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def highlight(self, content, top=5):

        Indra Talip
    
create an index for commit messages and the ability to search them and see results

              r2640
            
              if self.search_type not in ['content', 'message']:

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
                  return ''

        marcink
    
moved make-index command to paster_commands module...

              r3915
            
              hl = whoosh_highlight(

        marcink
    
UI fixes for searching

              r2389
            
                  text=content,

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
                  terms=self.highlight_items,

                  analyzer=ANALYZER,

                  fragmenter=FRAGMENTER,

                  formatter=FORMATTER,

                  top=top

              )

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return hl

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink code docs, updates	r903	# -- coding: utf-8 --
		"""
		rhodecode.lib.indexers.__init__
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		Whoosh indexing module for RhodeCode
marcink source code cleanup: remove trailing white space, normalize file endings	r1203
marcink code docs, updates	r903	:created_on: Aug 17, 2010
		:author: marcink
marcink 2012 copyrights	r1824	:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
marcink code docs, updates	r903	:license: GPLv3, see COPYING for more details.
		"""
marcink fixed license issue #149	r1206	# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU General Public License as published by
		# the Free Software Foundation, either version 3 of the License, or
		# (at your option) any later version.
marcink source code cleanup: remove trailing white space, normalize file endings	r1203	#
marcink code docs, updates	r903	# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
marcink source code cleanup: remove trailing white space, normalize file endings	r1203	#
marcink code docs, updates	r903	# You should have received a copy of the GNU General Public License
marcink fixed license issue #149	r1206	# along with this program. If not, see <http://www.gnu.org/licenses/>.
marcink Hacking for git support,and new faster repo scan	r631	import os
		import sys
marcink Whoosh logging is now controlled by the .ini files logging setup	r2102	import logging
marcink renamed project to rhodecode	r547	from os.path import dirname as dn, join as jn
marcink Hacking for git support,and new faster repo scan	r631
		#to get the rhodecode import
		sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

marcink renamed project to rhodecode	r547	from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
marcink Implemented filtering of admin journal based on Whoosh Query language...	r3062	from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType, DATETIME
marcink renamed project to rhodecode	r547	from whoosh.formats import Characters
marcink moved make-index command to paster_commands module...	r3915	from whoosh.highlight import highlight as whoosh_highlight, HtmlFormatter, ContextFragmenter
marcink utils/conf...	r2109	from rhodecode.lib.utils2 import LazyProperty
marcink renamed project to rhodecode	r547
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	log = logging.getLogger(__name__)

marcink bumbed whoosh to 2.3.X series...	r1995	# CUSTOM ANALYZER wordsplit + lowercase filter
marcink renamed project to rhodecode	r547	ANALYZER = RegexTokenizer(expression=r"\w+") \| LowercaseFilter()

		#INDEX SCHEMA DEFINITION
marcink bumbed whoosh to 2.3.X series...	r1995	SCHEMA = Schema(
marcink #453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files	r2388	fileid=ID(unique=True),
marcink bumbed whoosh to 2.3.X series...	r1995	owner=TEXT(),
		repository=TEXT(stored=True),
		path=TEXT(stored=True),
		content=FieldType(format=Characters(), analyzer=ANALYZER,
		scorable=True, stored=True),
		modtime=STORED(),
		extension=TEXT(stored=True)
		)
marcink renamed project to rhodecode	r547
		IDX_NAME = 'HG_INDEX'
marcink Hacking for git support,and new faster repo scan	r631	FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
marcink bumbed whoosh to 2.3.X series...	r1995	FRAGMENTER = ContextFragmenter(200)
marcink Hacking for git support,and new faster repo scan	r631
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	CHGSETS_SCHEMA = Schema(
Indra Talip rename changeset index key to match raw_id rather than path for greater consistency	r2642	raw_id=ID(unique=True, stored=True),
marcink Extended commit search schema with date of commit	r2693	date=NUMERIC(stored=True),
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	last=BOOLEAN(),
		owner=TEXT(),
		repository=ID(unique=True, stored=True),
		author=TEXT(stored=True),
		message=FieldType(format=Characters(), analyzer=ANALYZER,
		scorable=True, stored=True),
		parents=TEXT(),
		added=TEXT(),
		removed=TEXT(),
		changed=TEXT(),
		)

		CHGSET_IDX_NAME = 'CHGSET_INDEX'
marcink Hacking for git support,and new faster repo scan	r631
marcink Implemented filtering of admin journal based on Whoosh Query language...	r3062	# used only to generate queries in journal
		JOURNAL_SCHEMA = Schema(
		username=TEXT(),
		date=DATETIME(),
		action=TEXT(),
		repository=TEXT(),
		ip=TEXT(),
		)

marcink fixes issue #524...	r2718
marcink fixes issue #454 Search results under Windows include preceeding backslash	r2319	class WhooshResultWrapper(object):
		def __init__(self, search_type, searcher, matcher, highlight_items,
		repo_location):
marcink Added searching for file names within the repository in rhodecode	r556	self.search_type = search_type
marcink renamed project to rhodecode	r547	self.searcher = searcher
		self.matcher = matcher
		self.highlight_items = highlight_items
marcink bumbed whoosh to 2.3.X series...	r1995	self.fragment_size = 200
marcink fixes issue #454 Search results under Windows include preceeding backslash	r2319	self.repo_location = repo_location
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	@LazyProperty
		def doc_ids(self):
		docs_id = []
		while self.matcher.is_active():
		docnum = self.matcher.id()
		chunks = [offsets for offsets in self.get_chunks()]
		docs_id.append([docnum, chunks])
		self.matcher.next()
marcink Hacking for git support,and new faster repo scan	r631	return docs_id

marcink renamed project to rhodecode	r547	def __str__(self):
		return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

		def __repr__(self):
		return self.__str__()

		def __len__(self):
		return len(self.doc_ids)

		def __iter__(self):
		"""
		Allows Iteration over results,and lazy generate content

		Requires implementation of ``__getitem__`` method.
		"""
		for docid in self.doc_ids:
		yield self.get_full_content(docid)

marcink fixed issue with latest webhelpers pagination module	r1198	def __getitem__(self, key):
marcink renamed project to rhodecode	r547	"""
		Slicing of resultWrapper
		"""
marcink fixed issue with latest webhelpers pagination module	r1198	i, j = key.start, key.stop

marcink bumbed whoosh to 2.3.X series...	r1995	slices = []
marcink renamed project to rhodecode	r547	for docid in self.doc_ids[i:j]:
marcink bumbed whoosh to 2.3.X series...	r1995	slices.append(self.get_full_content(docid))
		return slices
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def get_full_content(self, docid):
		res = self.searcher.stored_fields(docid[0])
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	log.debug('result: %s' % res)
		if self.search_type == 'content':
Indra Talip rename changeset index key to match raw_id rather than path for greater consistency	r2642	full_repo_path = jn(self.repo_location, res['repository'])
		f_path = res['path'].split(full_repo_path)[-1]
		f_path = f_path.lstrip(os.sep)
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	content_short = self.get_short_content(res, docid[1])
		res.update({'content_short': content_short,
Indra Talip rename changeset index key to match raw_id rather than path for greater consistency	r2642	'content_short_hl': self.highlight(content_short),
		'f_path': f_path
		})
marcink fixes issue #524...	r2718	elif self.search_type == 'path':
		full_repo_path = jn(self.repo_location, res['repository'])
		f_path = res['path'].split(full_repo_path)[-1]
		f_path = f_path.lstrip(os.sep)
		res.update({'f_path': f_path})
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	elif self.search_type == 'message':
		res.update({'message_hl': self.highlight(res['message'])})

		log.debug('result: %s' % res)
marcink Hacking for git support,and new faster repo scan	r631
		return res

marcink renamed project to rhodecode	r547	def get_short_content(self, res, chunks):
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def get_chunks(self):
		"""
		Smart function that implements chunking the content
		but not overlap chunks so it doesn't highlight the same
marcink Added searching for file names within the repository in rhodecode	r556	close occurrences twice.
marcink renamed project to rhodecode	r547	"""
		memory = [(0, 0)]
marcink white space cleanup	r2673	if self.matcher.supports('positions'):
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	for span in self.matcher.spans():
		start = span.startchar or 0
		end = span.endchar or 0
		start_offseted = max(0, start - self.fragment_size)
		end_offseted = end + self.fragment_size
marcink Hacking for git support,and new faster repo scan	r631
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	if start_offseted < memory[-1][1]:
		start_offseted = memory[-1][1]
		memory.append((start_offseted, end_offseted,))
		yield (start_offseted, end_offseted,)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def highlight(self, content, top=5):
Indra Talip create an index for commit messages and the ability to search them and see results	r2640	if self.search_type not in ['content', 'message']:
marcink Added searching for file names within the repository in rhodecode	r556	return ''
marcink moved make-index command to paster_commands module...	r3915	hl = whoosh_highlight(
marcink UI fixes for searching	r2389	text=content,
marcink bumbed whoosh to 2.3.X series...	r1995	terms=self.highlight_items,
		analyzer=ANALYZER,
		fragmenter=FRAGMENTER,
		formatter=FORMATTER,
		top=top
		)
marcink Hacking for git support,and new faster repo scan	r631	return hl