upstream/kallithea Files · rhodecode/lib/indexers/__init__.py

reset charset for git rpc cals also

marcink - - Load All Authors

File last commit:

r2389:324b8382 beta


                r2581:ee980ead

beta

Download file

             __init__.py
        
                    241 lines
            
             | 8.4 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / indexers / __init__.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
code docs, updates

              r903
            
      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.indexers.__init__

          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

          Whoosh indexing module for RhodeCode

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
        marcink
    
code docs, updates

              r903
            
          :created_on: Aug 17, 2010

          :author: marcink

        marcink
    
2012 copyrights

              r1824
            
          :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>

        marcink
    
code docs, updates

              r903
            
          :license: GPLv3, see COPYING for more details.

      """

        marcink
    
fixed license  issue #149

              r1206
            
      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
      #

        marcink
    
code docs, updates

              r903
            
      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

        marcink
    
source code cleanup: remove trailing white space, normalize file endings

              r1203
            
      #

        marcink
    
code docs, updates

              r903
            
      # You should have received a copy of the GNU General Public License

        marcink
    
fixed license  issue #149

              r1206
            
      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      import os

      import sys

        marcink
    
complete rewrite of paster commands,...

              r785
            
      import traceback

        marcink
    
Whoosh logging is now controlled by the .ini files logging setup

              r2102
            
      import logging

        marcink
    
renamed project to rhodecode

              r547
            
      from os.path import dirname as dn, join as jn

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      #to get the rhodecode import

      sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

        marcink
    
fixes #90 + docs update

              r894
            
      from string import strip

        marcink
    
renamed project to rhodecode

              r547
            
      from shutil import rmtree

        marcink
    
complete rewrite of paster commands,...

              r785
            
        marcink
    
renamed project to rhodecode

              r547
            
      from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter

      from whoosh.fields import TEXT, ID, STORED, Schema, FieldType

      from whoosh.index import create_in, open_dir

      from whoosh.formats import Characters

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
UI fixes for searching

              r2389
            
      from webhelpers.html.builder import escape, literal

        marcink
    
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map

              r1302
            
      from sqlalchemy import engine_from_config

      from rhodecode.model import init_model

      from rhodecode.model.scm import ScmModel

        Jared Bunting
    
Allowing indexing job to resolve repos path on its own if not given.

              r1407
            
      from rhodecode.model.repo import RepoModel

        marcink
    
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map

              r1302
            
      from rhodecode.config.environment import load_environment

        marcink
    
utils/conf...

              r2109
            
      from rhodecode.lib.utils2 import LazyProperty

      from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\

          load_rcextensions

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      # CUSTOM ANALYZER wordsplit + lowercase filter

        marcink
    
renamed project to rhodecode

              r547
            
      ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()

      #INDEX SCHEMA DEFINITION

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      SCHEMA = Schema(

        marcink
    
#453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files

              r2388
            
          fileid=ID(unique=True),

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
          owner=TEXT(),

          repository=TEXT(stored=True),

          path=TEXT(stored=True),

          content=FieldType(format=Characters(), analyzer=ANALYZER,

                            scorable=True, stored=True),

          modtime=STORED(),

          extension=TEXT(stored=True)

      )

        marcink
    
renamed project to rhodecode

              r547
            
      IDX_NAME = 'HG_INDEX'

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
      FRAGMENTER = ContextFragmenter(200)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
complete rewrite of paster commands,...

              r785
            
      class MakeIndex(BasePasterCommand):

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
          max_args = 1

          min_args = 1

          usage = "CONFIG_FILE"

          summary = "Creates index for full text search given configuration file"

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
          group_name = "RhodeCode"

          takes_config_file = -1

        marcink
    
complete rewrite of paster commands,...

              r785
            
          parser = Command.standard_parser(verbose=True)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
          def command(self):

        marcink
    
Whoosh logging is now controlled by the .ini files logging setup

              r2102
            
              logging.config.fileConfig(self.path_to_ini_file)

        marcink
    
complete rewrite of paster commands,...

              r785
            
              from pylons import config

              add_cache(config)

              engine = engine_from_config(config, 'sqlalchemy.db1.')

              init_model(engine)

              index_location = config['index_dir']

        marcink
    
Updated contributors and fixed index line length

              r1409
            
              repo_location = self.options.repo_location \

                  if self.options.repo_location else RepoModel().repos_path

        marcink
    
fixes issue #146

              r1183
            
              repo_list = map(strip, self.options.repo_list.split(',')) \

                  if self.options.repo_list else None

        marcink
    
#469 added --update-only option to whoosh to re-index only given list...

              r2373
            
              repo_update_list = map(strip, self.options.repo_update_list.split(',')) \

                  if self.options.repo_update_list else None

        marcink
    
utils/conf...

              r2109
            
              load_rcextensions(config['here'])

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              #======================================================================

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              # WHOOSH DAEMON

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              #======================================================================

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              from rhodecode.lib.pidlock import LockHeld, DaemonLock

              from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon

              try:

        marcink
    
fixes #258 RhodeCode 1.2 assumes egg folder is writable

              r1540
            
                  l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
                  WhooshIndexingDaemon(index_location=index_location,

        marcink
    
fixes #90 + docs update

              r894
            
                                       repo_location=repo_location,

        marcink
    
#469 added --update-only option to whoosh to re-index only given list...

              r2373
            
                                       repo_list=repo_list,

                                       repo_update_list=repo_update_list)\

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                      .run(full_index=self.options.full_index)

                  l.release()

              except LockHeld:

                  sys.exit(1)

        marcink
    
complete rewrite of paster commands,...

              r785
            
          def update_parser(self):

              self.parser.add_option('--repo-location',

                                action='store',

                                dest='repo_location',

        Jared Bunting
    
Adding documentation for indexer's self-resolving repos location.

              r1408
            
                                help="Specifies repositories location to index OPTIONAL",

        marcink
    
complete rewrite of paster commands,...

              r785
            
                                )

        marcink
    
fixes #90 + docs update

              r894
            
              self.parser.add_option('--index-only',

                                action='store',

                                dest='repo_list',

                                help="Specifies a comma separated list of repositores "

        marcink
    
#469 added --update-only option to whoosh to re-index only given list...

              r2373
            
                                      "to build index on. If not given all repositories "

                                      "are scanned for indexing. OPTIONAL",

                                )

              self.parser.add_option('--update-only',

                                action='store',

                                dest='repo_update_list',

                                help="Specifies a comma separated list of repositores "

                                      "to re-build index on. OPTIONAL",

        marcink
    
fixes #90 + docs update

              r894
            
                                )

        marcink
    
complete rewrite of paster commands,...

              r785
            
              self.parser.add_option('-f',

                                action='store_true',

                                dest='full_index',

                                help="Specifies that index should be made full i.e"

                                      " destroy old and build from scratch",

                                default=False)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
implements #330 api method for listing nodes at particular revision...

              r1810
            
        marcink
    
fixes issue #454 Search results under Windows include preceeding backslash

              r2319
            
      class WhooshResultWrapper(object):

          def __init__(self, search_type, searcher, matcher, highlight_items,

                       repo_location):

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
              self.search_type = search_type

        marcink
    
renamed project to rhodecode

              r547
            
              self.searcher = searcher

              self.matcher = matcher

              self.highlight_items = highlight_items

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              self.fragment_size = 200

        marcink
    
fixes issue #454 Search results under Windows include preceeding backslash

              r2319
            
              self.repo_location = repo_location

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          @LazyProperty

          def doc_ids(self):

              docs_id = []

              while self.matcher.is_active():

                  docnum = self.matcher.id()

                  chunks = [offsets for offsets in self.get_chunks()]

                  docs_id.append([docnum, chunks])

                  self.matcher.next()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return docs_id

        marcink
    
renamed project to rhodecode

              r547
            
          def __str__(self):

              return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

          def __repr__(self):

              return self.__str__()

          def __len__(self):

              return len(self.doc_ids)

          def __iter__(self):

              """

              Allows Iteration over results,and lazy generate content

              *Requires* implementation of ``__getitem__`` method.

              """

              for docid in self.doc_ids:

                  yield self.get_full_content(docid)

        marcink
    
fixed issue with latest webhelpers pagination module

              r1198
            
          def __getitem__(self, key):

        marcink
    
renamed project to rhodecode

              r547
            
              """

              Slicing of resultWrapper

              """

        marcink
    
fixed issue with latest webhelpers pagination module

              r1198
            
              i, j = key.start, key.stop

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              slices = []

        marcink
    
renamed project to rhodecode

              r547
            
              for docid in self.doc_ids[i:j]:

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
                  slices.append(self.get_full_content(docid))

              return slices

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def get_full_content(self, docid):

              res = self.searcher.stored_fields(docid[0])

        marcink
    
fixes issue #454 Search results under Windows include preceeding backslash

              r2319
            
              full_repo_path = jn(self.repo_location, res['repository'])

              f_path = res['path'].split(full_repo_path)[-1]

              f_path = f_path.lstrip(os.sep)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              content_short = self.get_short_content(res, docid[1])

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              res.update({'content_short': content_short,

                          'content_short_hl': self.highlight(content_short),

                          'f_path': f_path})

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return res

        marcink
    
renamed project to rhodecode

              r547
            
          def get_short_content(self, res, chunks):

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def get_chunks(self):

              """

              Smart function that implements chunking the content

              but not overlap chunks so it doesn't highlight the same

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
              close occurrences twice.

        marcink
    
implements #330 api method for listing nodes at particular revision...

              r1810
            
        marcink
    
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map

              r1302
            
              :param matcher:

              :param size:

        marcink
    
renamed project to rhodecode

              r547
            
              """

              memory = [(0, 0)]

              for span in self.matcher.spans():

                  start = span.startchar or 0

                  end = span.endchar or 0

                  start_offseted = max(0, start - self.fragment_size)

                  end_offseted = end + self.fragment_size

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
                  if start_offseted < memory[-1][1]:

                      start_offseted = memory[-1][1]

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                  memory.append((start_offseted, end_offseted,))

                  yield (start_offseted, end_offseted,)

        marcink
    
renamed project to rhodecode

              r547
            
          def highlight(self, content, top=5):

        marcink
    
Added searching for file names within the repository in rhodecode

              r556
            
              if self.search_type != 'content':

                  return ''

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
              hl = highlight(

        marcink
    
UI fixes for searching

              r2389
            
                  text=content,

        marcink
    
bumbed whoosh to 2.3.X series...

              r1995
            
                  terms=self.highlight_items,

                  analyzer=ANALYZER,

                  fragmenter=FRAGMENTER,

                  formatter=FORMATTER,

                  top=top

              )

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return hl

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink code docs, updates	r903	# -- coding: utf-8 --
		"""
		rhodecode.lib.indexers.__init__
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		Whoosh indexing module for RhodeCode
marcink source code cleanup: remove trailing white space, normalize file endings	r1203
marcink code docs, updates	r903	:created_on: Aug 17, 2010
		:author: marcink
marcink 2012 copyrights	r1824	:copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
marcink code docs, updates	r903	:license: GPLv3, see COPYING for more details.
		"""
marcink fixed license issue #149	r1206	# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU General Public License as published by
		# the Free Software Foundation, either version 3 of the License, or
		# (at your option) any later version.
marcink source code cleanup: remove trailing white space, normalize file endings	r1203	#
marcink code docs, updates	r903	# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
marcink source code cleanup: remove trailing white space, normalize file endings	r1203	#
marcink code docs, updates	r903	# You should have received a copy of the GNU General Public License
marcink fixed license issue #149	r1206	# along with this program. If not, see <http://www.gnu.org/licenses/>.
marcink Hacking for git support,and new faster repo scan	r631	import os
		import sys
marcink complete rewrite of paster commands,...	r785	import traceback
marcink Whoosh logging is now controlled by the .ini files logging setup	r2102	import logging
marcink renamed project to rhodecode	r547	from os.path import dirname as dn, join as jn
marcink Hacking for git support,and new faster repo scan	r631
		#to get the rhodecode import
		sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))

marcink fixes #90 + docs update	r894	from string import strip
marcink renamed project to rhodecode	r547	from shutil import rmtree
marcink complete rewrite of paster commands,...	r785
marcink renamed project to rhodecode	r547	from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
		from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
		from whoosh.index import create_in, open_dir
		from whoosh.formats import Characters
marcink bumbed whoosh to 2.3.X series...	r1995	from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
marcink renamed project to rhodecode	r547
marcink UI fixes for searching	r2389	from webhelpers.html.builder import escape, literal
marcink moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map	r1302	from sqlalchemy import engine_from_config

		from rhodecode.model import init_model
		from rhodecode.model.scm import ScmModel
Jared Bunting Allowing indexing job to resolve repos path on its own if not given.	r1407	from rhodecode.model.repo import RepoModel
marcink moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map	r1302	from rhodecode.config.environment import load_environment
marcink utils/conf...	r2109	from rhodecode.lib.utils2 import LazyProperty
		from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
		load_rcextensions
marcink renamed project to rhodecode	r547
marcink bumbed whoosh to 2.3.X series...	r1995	# CUSTOM ANALYZER wordsplit + lowercase filter
marcink renamed project to rhodecode	r547	ANALYZER = RegexTokenizer(expression=r"\w+") \| LowercaseFilter()


		#INDEX SCHEMA DEFINITION
marcink bumbed whoosh to 2.3.X series...	r1995	SCHEMA = Schema(
marcink #453 added ID field in whoosh SCHEMA that solves the issue of reindexing modified files	r2388	fileid=ID(unique=True),
marcink bumbed whoosh to 2.3.X series...	r1995	owner=TEXT(),
		repository=TEXT(stored=True),
		path=TEXT(stored=True),
		content=FieldType(format=Characters(), analyzer=ANALYZER,
		scorable=True, stored=True),
		modtime=STORED(),
		extension=TEXT(stored=True)
		)
marcink renamed project to rhodecode	r547
		IDX_NAME = 'HG_INDEX'
marcink Hacking for git support,and new faster repo scan	r631	FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
marcink bumbed whoosh to 2.3.X series...	r1995	FRAGMENTER = ContextFragmenter(200)
marcink Hacking for git support,and new faster repo scan	r631

marcink complete rewrite of paster commands,...	r785	class MakeIndex(BasePasterCommand):
marcink Hacking for git support,and new faster repo scan	r631
		max_args = 1
		min_args = 1

		usage = "CONFIG_FILE"
		summary = "Creates index for full text search given configuration file"
marcink Implemented whoosh index building as paster command....	r683	group_name = "RhodeCode"
		takes_config_file = -1
marcink complete rewrite of paster commands,...	r785	parser = Command.standard_parser(verbose=True)

marcink Hacking for git support,and new faster repo scan	r631	def command(self):
marcink Whoosh logging is now controlled by the .ini files logging setup	r2102	logging.config.fileConfig(self.path_to_ini_file)
marcink complete rewrite of paster commands,...	r785	from pylons import config
		add_cache(config)
		engine = engine_from_config(config, 'sqlalchemy.db1.')
		init_model(engine)
		index_location = config['index_dir']
marcink Updated contributors and fixed index line length	r1409	repo_location = self.options.repo_location \
		if self.options.repo_location else RepoModel().repos_path
marcink fixes issue #146	r1183	repo_list = map(strip, self.options.repo_list.split(',')) \
		if self.options.repo_list else None
marcink #469 added --update-only option to whoosh to re-index only given list...	r2373	repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
		if self.options.repo_update_list else None
marcink utils/conf...	r2109	load_rcextensions(config['here'])
marcink Implemented whoosh index building as paster command....	r683	#======================================================================
marcink Hacking for git support,and new faster repo scan	r631	# WHOOSH DAEMON
marcink Implemented whoosh index building as paster command....	r683	#======================================================================
marcink Hacking for git support,and new faster repo scan	r631	from rhodecode.lib.pidlock import LockHeld, DaemonLock
		from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
		try:
marcink fixes #258 RhodeCode 1.2 assumes egg folder is writable	r1540	l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
marcink Implemented whoosh index building as paster command....	r683	WhooshIndexingDaemon(index_location=index_location,
marcink fixes #90 + docs update	r894	repo_location=repo_location,
marcink #469 added --update-only option to whoosh to re-index only given list...	r2373	repo_list=repo_list,
		repo_update_list=repo_update_list)\
marcink Hacking for git support,and new faster repo scan	r631	.run(full_index=self.options.full_index)
		l.release()
		except LockHeld:
		sys.exit(1)

marcink complete rewrite of paster commands,...	r785	def update_parser(self):
		self.parser.add_option('--repo-location',
		action='store',
		dest='repo_location',
Jared Bunting Adding documentation for indexer's self-resolving repos location.	r1408	help="Specifies repositories location to index OPTIONAL",
marcink complete rewrite of paster commands,...	r785	)
marcink fixes #90 + docs update	r894	self.parser.add_option('--index-only',
		action='store',
		dest='repo_list',
		help="Specifies a comma separated list of repositores "
marcink #469 added --update-only option to whoosh to re-index only given list...	r2373	"to build index on. If not given all repositories "
		"are scanned for indexing. OPTIONAL",
		)
		self.parser.add_option('--update-only',
		action='store',
		dest='repo_update_list',
		help="Specifies a comma separated list of repositores "
		"to re-build index on. OPTIONAL",
marcink fixes #90 + docs update	r894	)
marcink complete rewrite of paster commands,...	r785	self.parser.add_option('-f',
		action='store_true',
		dest='full_index',
		help="Specifies that index should be made full i.e"
		" destroy old and build from scratch",
		default=False)
marcink Hacking for git support,and new faster repo scan	r631
marcink implements #330 api method for listing nodes at particular revision...	r1810
marcink fixes issue #454 Search results under Windows include preceeding backslash	r2319	class WhooshResultWrapper(object):
		def __init__(self, search_type, searcher, matcher, highlight_items,
		repo_location):
marcink Added searching for file names within the repository in rhodecode	r556	self.search_type = search_type
marcink renamed project to rhodecode	r547	self.searcher = searcher
		self.matcher = matcher
		self.highlight_items = highlight_items
marcink bumbed whoosh to 2.3.X series...	r1995	self.fragment_size = 200
marcink fixes issue #454 Search results under Windows include preceeding backslash	r2319	self.repo_location = repo_location
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	@LazyProperty
		def doc_ids(self):
		docs_id = []
		while self.matcher.is_active():
		docnum = self.matcher.id()
		chunks = [offsets for offsets in self.get_chunks()]
		docs_id.append([docnum, chunks])
		self.matcher.next()
marcink Hacking for git support,and new faster repo scan	r631	return docs_id

marcink renamed project to rhodecode	r547	def __str__(self):
		return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))

		def __repr__(self):
		return self.__str__()

		def __len__(self):
		return len(self.doc_ids)

		def __iter__(self):
		"""
		Allows Iteration over results,and lazy generate content

		Requires implementation of ``__getitem__`` method.
		"""
		for docid in self.doc_ids:
		yield self.get_full_content(docid)

marcink fixed issue with latest webhelpers pagination module	r1198	def __getitem__(self, key):
marcink renamed project to rhodecode	r547	"""
		Slicing of resultWrapper
		"""
marcink fixed issue with latest webhelpers pagination module	r1198	i, j = key.start, key.stop

marcink bumbed whoosh to 2.3.X series...	r1995	slices = []
marcink renamed project to rhodecode	r547	for docid in self.doc_ids[i:j]:
marcink bumbed whoosh to 2.3.X series...	r1995	slices.append(self.get_full_content(docid))
		return slices
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def get_full_content(self, docid):
		res = self.searcher.stored_fields(docid[0])
marcink fixes issue #454 Search results under Windows include preceeding backslash	r2319	full_repo_path = jn(self.repo_location, res['repository'])
		f_path = res['path'].split(full_repo_path)[-1]
		f_path = f_path.lstrip(os.sep)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	content_short = self.get_short_content(res, docid[1])
marcink bumbed whoosh to 2.3.X series...	r1995	res.update({'content_short': content_short,
		'content_short_hl': self.highlight(content_short),
		'f_path': f_path})
marcink Hacking for git support,and new faster repo scan	r631
		return res

marcink renamed project to rhodecode	r547	def get_short_content(self, res, chunks):
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def get_chunks(self):
		"""
		Smart function that implements chunking the content
		but not overlap chunks so it doesn't highlight the same
marcink Added searching for file names within the repository in rhodecode	r556	close occurrences twice.
marcink implements #330 api method for listing nodes at particular revision...	r1810
marcink moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map	r1302	:param matcher:
		:param size:
marcink renamed project to rhodecode	r547	"""
		memory = [(0, 0)]
		for span in self.matcher.spans():
		start = span.startchar or 0
		end = span.endchar or 0
		start_offseted = max(0, start - self.fragment_size)
		end_offseted = end + self.fragment_size
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	if start_offseted < memory[-1][1]:
		start_offseted = memory[-1][1]
marcink Hacking for git support,and new faster repo scan	r631	memory.append((start_offseted, end_offseted,))
		yield (start_offseted, end_offseted,)

marcink renamed project to rhodecode	r547	def highlight(self, content, top=5):
marcink Added searching for file names within the repository in rhodecode	r556	if self.search_type != 'content':
		return ''
marcink bumbed whoosh to 2.3.X series...	r1995	hl = highlight(
marcink UI fixes for searching	r2389	text=content,
marcink bumbed whoosh to 2.3.X series...	r1995	terms=self.highlight_items,
		analyzer=ANALYZER,
		fragmenter=FRAGMENTER,
		formatter=FORMATTER,
		top=top
		)
marcink Hacking for git support,and new faster repo scan	r631	return hl