upstream/kallithea Files · rhodecode/lib/indexers/daemon.py

fixed allowed '.' character in username

marcink - - Load All Authors

File last commit:

r947:99850ac8 beta


                r960:029e69f0

beta

Download file

             daemon.py
        
                    237 lines
            
             | 8.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / indexers / daemon.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
fixes to #92, updated changelog

              r885
            
      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.indexers.daemon

          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

          A deamon will read from task table and run tasks

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
        marcink
    
fixes to #92, updated changelog

              r885
            
          :created_on: Jan 26, 2010

          :author: marcink

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
          :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>

        marcink
    
fixes to #92, updated changelog

              r885
            
          :license: GPLv3, see COPYING for more details.

      """

        marcink
    
renamed project to rhodecode

              r547
            
      # This program is free software; you can redistribute it and/or

      # modify it under the terms of the GNU General Public License

      # as published by the Free Software Foundation; version 2

      # of the License or (at your opinion) any later version of the license.

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
      #

        marcink
    
renamed project to rhodecode

              r547
            
      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
      #

        marcink
    
renamed project to rhodecode

              r547
            
      # You should have received a copy of the GNU General Public License

      # along with this program; if not, write to the Free Software

      # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,

      # MA  02110-1301, USA.

      import sys

      import os

        marcink
    
fixes to #92, updated changelog

              r885
            
      import traceback

        marcink
    
renamed project to rhodecode

              r547
            
      from os.path import dirname as dn

      from os.path import join as jn

      #to get the rhodecode import

      project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))

      sys.path.append(project_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
Refactor codes for scm model...

              r691
            
      from rhodecode.model.scm import ScmModel

        marcink
    
renamed project to rhodecode

              r547
            
      from rhodecode.lib.helpers import safe_unicode

      from whoosh.index import create_in, open_dir

      from shutil import rmtree

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
      from time import mktime

        marcink
    
fixed whoosh failure on new repository...

              r567
            
      from vcs.exceptions import ChangesetError, RepositoryError

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
        marcink
    
renamed project to rhodecode

              r547
            
      import logging

      log = logging.getLogger('whooshIndexer')

      # create logger

      log.setLevel(logging.DEBUG)

      log.propagate = False

      # create console handler and set level to debug

      ch = logging.StreamHandler()

      ch.setLevel(logging.DEBUG)

      # create formatter

      formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

      # add formatter to ch

      ch.setFormatter(formatter)

      # add ch to logger

      log.addHandler(ch)

      class WhooshIndexingDaemon(object):

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
          """

          Deamon for atomic jobs

          """

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
          def __init__(self, indexname='HG_INDEX', index_location=None,

        marcink
    
fixes #90 + docs update

              r894
            
                       repo_location=None, sa=None, repo_list=None):

        marcink
    
renamed project to rhodecode

              r547
            
              self.indexname = indexname

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              self.index_location = index_location

              if not index_location:

                  raise Exception('You have to provide index location')

        marcink
    
renamed project to rhodecode

              r547
            
              self.repo_location = repo_location

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if not repo_location:

                  raise Exception('You have to provide repositories location')

        marcink
    
fixed cache problem,...

              r777
            
              self.repo_paths = ScmModel(sa).repo_scan(self.repo_location, None)

        marcink
    
fixes #90 + docs update

              r894
            
              if repo_list:

                  filtered_repo_paths = {}

                  for repo_name, repo in self.repo_paths.items():

                      if repo_name in repo_list:

                          filtered_repo_paths[repo.name] = repo

                  self.repo_paths = filtered_repo_paths

        marcink
    
renamed project to rhodecode

              r547
            
              self.initial = False

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if not os.path.isdir(self.index_location):

        marcink
    
extended trending languages to more entries, implemented new faster and "fancy"...

              r763
            
                  os.makedirs(self.index_location)

        marcink
    
renamed project to rhodecode

              r547
            
                  log.info('Cannot run incremental index since it does not'

                           ' yet exist running full build')

                  self.initial = True

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_paths(self, repo):

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              """recursive walk in root dir and return a set of all path in that dir

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
              based on repository walk function

              """

        marcink
    
renamed project to rhodecode

              r547
            
              index_paths_ = set()

        marcink
    
fixed whoosh failure on new repository...

              r567
            
              try:

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
                  tip = repo.get_changeset('tip')

                  for topnode, dirs, files in tip.walk('/'):

        marcink
    
renamed project to rhodecode

              r547
            
                      for f in files:

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                          index_paths_.add(jn(repo.path, f.path))

        marcink
    
fixed whoosh failure on new repository...

              r567
            
                      for dir in dirs:

                          for f in files:

                              index_paths_.add(jn(repo.path, f.path))

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixes to #92, updated changelog

              r885
            
              except RepositoryError, e:

                  log.debug(traceback.format_exc())

        marcink
    
fixed whoosh failure on new repository...

              r567
            
                  pass

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return index_paths_

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_node(self, repo, path):

              n_path = path[len(repo.path) + 1:]

              node = repo.get_changeset().get_node(n_path)

              return node

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_node_mtime(self, node):

              return mktime(node.last_changeset.date.timetuple())

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def add_doc(self, writer, path, repo):

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              """Adding doc to writer this function itself fetches data from

              the instance of vcs backend"""

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              node = self.get_node(repo, path)

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
        marcink
    
#92 removed content of binary files for whoosh indexer

              r886
            
              #we just index the content of chosen files, and skip binary files

              if node.extension in INDEX_EXTENSIONS and not node.is_binary:

        marcink
    
fixes to #92, updated changelog

              r885
            
        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
                  u_content = node.content

        marcink
    
fixes to #92, updated changelog

              r885
            
                  if not isinstance(u_content, unicode):

                      log.warning('  >> %s Could not get this content as unicode '

                                'replacing with empty content', path)

                      u_content = u''

                  else:

                      log.debug('    >> %s [WITH CONTENT]' % path)

        marcink
    
renamed project to rhodecode

              r547
            
              else:

                  log.debug('    >> %s' % path)

                  #just index file name without it's content

                  u_content = u''

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
              writer.add_document(owner=unicode(repo.contact),

                              repository=safe_unicode(repo.name),

                              path=safe_unicode(path),

                              content=u_content,

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                              modtime=self.get_node_mtime(node),

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                              extension=node.extension)

        marcink
    
renamed project to rhodecode

              r547
            
          def build_index(self):

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if os.path.exists(self.index_location):

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
                  log.debug('removing previous index')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                  rmtree(self.index_location)

              if not os.path.exists(self.index_location):

                  os.mkdir(self.index_location)

              idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)

        marcink
    
renamed project to rhodecode

              r547
            
              writer = idx.writer()

        marcink
    
fixes #90 + docs update

              r894
            
              for repo in self.repo_paths.values():

        marcink
    
renamed project to rhodecode

              r547
            
                  log.debug('building index @ %s' % repo.path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  for idx_path in self.get_paths(repo):

        marcink
    
renamed project to rhodecode

              r547
            
                      self.add_doc(writer, idx_path, repo)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>> COMMITING CHANGES <<')

        marcink
    
renamed project to rhodecode

              r547
            
              writer.commit(merge=True)

              log.debug('>>> FINISHED BUILDING INDEX <<<')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def update_index(self):

              log.debug('STARTING INCREMENTAL INDEXING UPDATE')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              idx = open_dir(self.index_location, indexname=self.indexname)

        marcink
    
renamed project to rhodecode

              r547
            
              # The set of all paths in the index

              indexed_paths = set()

              # The set of all paths we need to re-index

              to_index = set()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              reader = idx.reader()

              writer = idx.writer()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              # Loop over the stored fields in the index

              for fields in reader.all_stored_fields():

                  indexed_path = fields['path']

                  indexed_paths.add(indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  repo = self.repo_paths[fields['repository']]

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  try:

                      node = self.get_node(repo, indexed_path)

                  except ChangesetError:

        marcink
    
renamed project to rhodecode

              r547
            
                      # This file was deleted since it was indexed

                      log.debug('removing from index %s' % indexed_path)

                      writer.delete_by_term('path', indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
                  else:

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                      # Check if this file was changed since it was indexed

        marcink
    
renamed project to rhodecode

              r547
            
                      indexed_time = fields['modtime']

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                      mtime = self.get_node_mtime(node)

        marcink
    
renamed project to rhodecode

              r547
            
                      if mtime > indexed_time:

                          # The file has changed, delete it and add it to the list of

                          # files to reindex

                          log.debug('adding to reindex list %s' % indexed_path)

                          writer.delete_by_term('path', indexed_path)

                          to_index.add(indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              # Loop over the files in the filesystem

              # Assume we have a function that gathers the filenames of the

              # documents to be indexed

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              for repo in self.repo_paths.values():

                  for path in self.get_paths(repo):

        marcink
    
renamed project to rhodecode

              r547
            
                      if path in to_index or path not in indexed_paths:

                          # This is either a file that's changed, or a new file

                          # that wasn't indexed before. So index it!

                          self.add_doc(writer, path, repo)

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                          log.debug('re indexing %s' % path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>> COMMITING CHANGES <<')

        marcink
    
renamed project to rhodecode

              r547
            
              writer.commit(merge=True)

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>>> FINISHED REBUILDING INDEX <<<')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def run(self, full_index=False):

              """Run daemon"""

              if full_index or self.initial:

                  self.build_index()

              else:

                  self.update_index()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink fixes to #92, updated changelog	r885	# -- coding: utf-8 --
		"""
		rhodecode.lib.indexers.daemon
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

		A deamon will read from task table and run tasks
marcink Fixed whoosh daemon, for depracated walk method	r947
marcink fixes to #92, updated changelog	r885	:created_on: Jan 26, 2010
		:author: marcink
marcink Fixed whoosh daemon, for depracated walk method	r947	:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
marcink fixes to #92, updated changelog	r885	:license: GPLv3, see COPYING for more details.
		"""
marcink renamed project to rhodecode	r547	# This program is free software; you can redistribute it and/or
		# modify it under the terms of the GNU General Public License
		# as published by the Free Software Foundation; version 2
		# of the License or (at your opinion) any later version of the license.
marcink Fixed whoosh daemon, for depracated walk method	r947	#
marcink renamed project to rhodecode	r547	# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
marcink Fixed whoosh daemon, for depracated walk method	r947	#
marcink renamed project to rhodecode	r547	# You should have received a copy of the GNU General Public License
		# along with this program; if not, write to the Free Software
		# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
		# MA 02110-1301, USA.

		import sys
		import os
marcink fixes to #92, updated changelog	r885	import traceback
marcink renamed project to rhodecode	r547	from os.path import dirname as dn
		from os.path import join as jn

		#to get the rhodecode import
		project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
		sys.path.append(project_path)

marcink Hacking for git support,and new faster repo scan	r631
marcink Refactor codes for scm model...	r691	from rhodecode.model.scm import ScmModel
marcink renamed project to rhodecode	r547	from rhodecode.lib.helpers import safe_unicode
		from whoosh.index import create_in, open_dir
		from shutil import rmtree
marcink Hacking for git support,and new faster repo scan	r631	from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
marcink renamed project to rhodecode	r547
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	from time import mktime
marcink fixed whoosh failure on new repository...	r567	from vcs.exceptions import ChangesetError, RepositoryError
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560
marcink renamed project to rhodecode	r547	import logging

		log = logging.getLogger('whooshIndexer')
		# create logger
		log.setLevel(logging.DEBUG)
		log.propagate = False
		# create console handler and set level to debug
		ch = logging.StreamHandler()
		ch.setLevel(logging.DEBUG)

		# create formatter
		formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")

		# add formatter to ch
		ch.setFormatter(formatter)

		# add ch to logger
		log.addHandler(ch)

		class WhooshIndexingDaemon(object):
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	"""
		Deamon for atomic jobs
		"""
marcink renamed project to rhodecode	r547
marcink Hacking for git support,and new faster repo scan	r631	def __init__(self, indexname='HG_INDEX', index_location=None,
marcink fixes #90 + docs update	r894	repo_location=None, sa=None, repo_list=None):
marcink renamed project to rhodecode	r547	self.indexname = indexname
marcink Hacking for git support,and new faster repo scan	r631
		self.index_location = index_location
		if not index_location:
		raise Exception('You have to provide index location')

marcink renamed project to rhodecode	r547	self.repo_location = repo_location
marcink Hacking for git support,and new faster repo scan	r631	if not repo_location:
		raise Exception('You have to provide repositories location')

marcink fixed cache problem,...	r777	self.repo_paths = ScmModel(sa).repo_scan(self.repo_location, None)
marcink fixes #90 + docs update	r894
		if repo_list:
		filtered_repo_paths = {}
		for repo_name, repo in self.repo_paths.items():
		if repo_name in repo_list:
		filtered_repo_paths[repo.name] = repo

		self.repo_paths = filtered_repo_paths


marcink renamed project to rhodecode	r547	self.initial = False
marcink Hacking for git support,and new faster repo scan	r631	if not os.path.isdir(self.index_location):
marcink extended trending languages to more entries, implemented new faster and "fancy"...	r763	os.makedirs(self.index_location)
marcink renamed project to rhodecode	r547	log.info('Cannot run incremental index since it does not'
		' yet exist running full build')
		self.initial = True
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_paths(self, repo):
marcink Implemented whoosh index building as paster command....	r683	"""recursive walk in root dir and return a set of all path in that dir
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	based on repository walk function
		"""
marcink renamed project to rhodecode	r547	index_paths_ = set()
marcink fixed whoosh failure on new repository...	r567	try:
marcink Fixed whoosh daemon, for depracated walk method	r947	tip = repo.get_changeset('tip')
		for topnode, dirs, files in tip.walk('/'):
marcink renamed project to rhodecode	r547	for f in files:
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	index_paths_.add(jn(repo.path, f.path))
marcink fixed whoosh failure on new repository...	r567	for dir in dirs:
		for f in files:
		index_paths_.add(jn(repo.path, f.path))
marcink Hacking for git support,and new faster repo scan	r631
marcink fixes to #92, updated changelog	r885	except RepositoryError, e:
		log.debug(traceback.format_exc())
marcink fixed whoosh failure on new repository...	r567	pass
marcink Hacking for git support,and new faster repo scan	r631	return index_paths_

marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_node(self, repo, path):
		n_path = path[len(repo.path) + 1:]
		node = repo.get_changeset().get_node(n_path)
		return node
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_node_mtime(self, node):
		return mktime(node.last_changeset.date.timetuple())
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def add_doc(self, writer, path, repo):
marcink Implemented whoosh index building as paster command....	r683	"""Adding doc to writer this function itself fetches data from
		the instance of vcs backend"""
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	node = self.get_node(repo, path)
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560
marcink #92 removed content of binary files for whoosh indexer	r886	#we just index the content of chosen files, and skip binary files
		if node.extension in INDEX_EXTENSIONS and not node.is_binary:
marcink fixes to #92, updated changelog	r885
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	u_content = node.content
marcink fixes to #92, updated changelog	r885	if not isinstance(u_content, unicode):
		log.warning(' >> %s Could not get this content as unicode '
		'replacing with empty content', path)
		u_content = u''
		else:
		log.debug(' >> %s [WITH CONTENT]' % path)

marcink renamed project to rhodecode	r547	else:
		log.debug(' >> %s' % path)
		#just index file name without it's content
		u_content = u''
marcink Hacking for git support,and new faster repo scan	r631
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	writer.add_document(owner=unicode(repo.contact),
		repository=safe_unicode(repo.name),
		path=safe_unicode(path),
		content=u_content,
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	modtime=self.get_node_mtime(node),
marcink Hacking for git support,and new faster repo scan	r631	extension=node.extension)

marcink renamed project to rhodecode	r547
		def build_index(self):
marcink Hacking for git support,and new faster repo scan	r631	if os.path.exists(self.index_location):
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	log.debug('removing previous index')
marcink Hacking for git support,and new faster repo scan	r631	rmtree(self.index_location)

		if not os.path.exists(self.index_location):
		os.mkdir(self.index_location)

		idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
marcink renamed project to rhodecode	r547	writer = idx.writer()
marcink fixes #90 + docs update	r894
		for repo in self.repo_paths.values():
marcink renamed project to rhodecode	r547	log.debug('building index @ %s' % repo.path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	for idx_path in self.get_paths(repo):
marcink renamed project to rhodecode	r547	self.add_doc(writer, idx_path, repo)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>> COMMITING CHANGES <<')
marcink renamed project to rhodecode	r547	writer.commit(merge=True)
		log.debug('>>> FINISHED BUILDING INDEX <<<')
marcink Hacking for git support,and new faster repo scan	r631

marcink renamed project to rhodecode	r547	def update_index(self):
		log.debug('STARTING INCREMENTAL INDEXING UPDATE')
marcink Hacking for git support,and new faster repo scan	r631
		idx = open_dir(self.index_location, indexname=self.indexname)
marcink renamed project to rhodecode	r547	# The set of all paths in the index
		indexed_paths = set()
		# The set of all paths we need to re-index
		to_index = set()
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	reader = idx.reader()
		writer = idx.writer()
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	# Loop over the stored fields in the index
		for fields in reader.all_stored_fields():
		indexed_path = fields['path']
		indexed_paths.add(indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	repo = self.repo_paths[fields['repository']]
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	try:
		node = self.get_node(repo, indexed_path)
		except ChangesetError:
marcink renamed project to rhodecode	r547	# This file was deleted since it was indexed
		log.debug('removing from index %s' % indexed_path)
		writer.delete_by_term('path', indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	else:
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	# Check if this file was changed since it was indexed
marcink renamed project to rhodecode	r547	indexed_time = fields['modtime']
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	mtime = self.get_node_mtime(node)
marcink renamed project to rhodecode	r547	if mtime > indexed_time:
		# The file has changed, delete it and add it to the list of
		# files to reindex
		log.debug('adding to reindex list %s' % indexed_path)
		writer.delete_by_term('path', indexed_path)
		to_index.add(indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	# Loop over the files in the filesystem
		# Assume we have a function that gathers the filenames of the
		# documents to be indexed
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	for repo in self.repo_paths.values():
		for path in self.get_paths(repo):
marcink renamed project to rhodecode	r547	if path in to_index or path not in indexed_paths:
		# This is either a file that's changed, or a new file
		# that wasn't indexed before. So index it!
		self.add_doc(writer, path, repo)
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('re indexing %s' % path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>> COMMITING CHANGES <<')
marcink renamed project to rhodecode	r547	writer.commit(merge=True)
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>>> FINISHED REBUILDING INDEX <<<')
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def run(self, full_index=False):
		"""Run daemon"""
		if full_index or self.initial:
		self.build_index()
		else:
		self.update_index()