upstream/kallithea Files · rhodecode/lib/indexers/daemon.py

implements rewrote diffs to enable displaying full diff on each file...

implements rewrote diffs to enable displaying full diff on each file - fixed escaping of html special chars in file editor

marcink - - Load All Authors

File last commit:

r1711:b369bec5 beta


                r1789:17caf4ef

beta

Download file

             daemon.py
        
                    238 lines
            
             | 8.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / indexers / daemon.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
fixes to #92, updated changelog

              r885
            
      # -*- coding: utf-8 -*-

      """

          rhodecode.lib.indexers.daemon

          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        marcink
    
fixed daemon typos

              r1377
            
          A daemon will read from task table and run tasks

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
        marcink
    
fixes to #92, updated changelog

              r885
            
          :created_on: Jan 26, 2010

          :author: marcink

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
          :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>

        marcink
    
fixes to #92, updated changelog

              r885
            
          :license: GPLv3, see COPYING for more details.

      """

        marcink
    
fixed license  issue #149

              r1206
            
      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
      #

        marcink
    
renamed project to rhodecode

              r547
            
      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
      #

        marcink
    
renamed project to rhodecode

              r547
            
      # You should have received a copy of the GNU General Public License

        marcink
    
fixed license  issue #149

              r1206
            
      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

              r1154
            
      import os

        marcink
    
renamed project to rhodecode

              r547
            
      import sys

        marcink
    
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

              r1154
            
      import logging

        marcink
    
fixes to #92, updated changelog

              r885
            
      import traceback

        marcink
    
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

              r1154
            
      from shutil import rmtree

      from time import mktime

        marcink
    
renamed project to rhodecode

              r547
            
      from os.path import dirname as dn

      from os.path import join as jn

      #to get the rhodecode import

      project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))

      sys.path.append(project_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
Refactor codes for scm model...

              r691
            
      from rhodecode.model.scm import ScmModel

        marcink
    
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

              r1154
            
      from rhodecode.lib import safe_unicode

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
      from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
fixes issue with whoosh reindexing files that were removed or renamed

              r1711
            
      from vcs.exceptions import ChangesetError, RepositoryError, \

          NodeDoesNotExistError

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
        marcink
    
simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function

              r1154
            
      from whoosh.index import create_in, open_dir

        marcink
    
renamed project to rhodecode

              r547
            
      log = logging.getLogger('whooshIndexer')

      # create logger

      log.setLevel(logging.DEBUG)

      log.propagate = False

      # create console handler and set level to debug

      ch = logging.StreamHandler()

      ch.setLevel(logging.DEBUG)

      # create formatter

        marcink
    
fixes issue #146

              r1183
            
      formatter = logging.Formatter("%(asctime)s - %(name)s -"

                                    " %(levelname)s - %(message)s")

        marcink
    
renamed project to rhodecode

              r547
            
      # add formatter to ch

      ch.setFormatter(formatter)

      # add ch to logger

      log.addHandler(ch)

      class WhooshIndexingDaemon(object):

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
          """

        marcink
    
fixed daemon typos

              r1377
            
          Daemon for atomic jobs

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
          """

        marcink
    
renamed project to rhodecode

              r547
            
        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
          def __init__(self, indexname='HG_INDEX', index_location=None,

        marcink
    
fixes #90 + docs update

              r894
            
                       repo_location=None, sa=None, repo_list=None):

        marcink
    
renamed project to rhodecode

              r547
            
              self.indexname = indexname

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              self.index_location = index_location

              if not index_location:

                  raise Exception('You have to provide index location')

        marcink
    
renamed project to rhodecode

              r547
            
              self.repo_location = repo_location

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if not repo_location:

                  raise Exception('You have to provide repositories location')

        marcink
    
Major refactoring, removed when possible calls to app globals....

              r1036
            
              self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)

        marcink
    
fixes #90 + docs update

              r894
            
              if repo_list:

                  filtered_repo_paths = {}

                  for repo_name, repo in self.repo_paths.items():

                      if repo_name in repo_list:

        marcink
    
changes for #56

              r1171
            
                          filtered_repo_paths[repo_name] = repo

        marcink
    
fixes #90 + docs update

              r894
            
                  self.repo_paths = filtered_repo_paths

        marcink
    
renamed project to rhodecode

              r547
            
              self.initial = False

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if not os.path.isdir(self.index_location):

        marcink
    
extended trending languages to more entries, implemented new faster and "fancy"...

              r763
            
                  os.makedirs(self.index_location)

        marcink
    
renamed project to rhodecode

              r547
            
                  log.info('Cannot run incremental index since it does not'

                           ' yet exist running full build')

                  self.initial = True

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_paths(self, repo):

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              """recursive walk in root dir and return a set of all path in that dir

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
              based on repository walk function

              """

        marcink
    
renamed project to rhodecode

              r547
            
              index_paths_ = set()

        marcink
    
fixed whoosh failure on new repository...

              r567
            
              try:

        marcink
    
Fixed whoosh daemon, for depracated walk method

              r947
            
                  tip = repo.get_changeset('tip')

                  for topnode, dirs, files in tip.walk('/'):

        marcink
    
renamed project to rhodecode

              r547
            
                      for f in files:

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                          index_paths_.add(jn(repo.path, f.path))

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixes to #92, updated changelog

              r885
            
              except RepositoryError, e:

                  log.debug(traceback.format_exc())

        marcink
    
fixed whoosh failure on new repository...

              r567
            
                  pass

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              return index_paths_

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_node(self, repo, path):

              n_path = path[len(repo.path) + 1:]

              node = repo.get_changeset().get_node(n_path)

              return node

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
          def get_node_mtime(self, node):

              return mktime(node.last_changeset.date.timetuple())

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
changes for #56

              r1171
            
          def add_doc(self, writer, path, repo, repo_name):

        marcink
    
Implemented whoosh index building as paster command....

              r683
            
              """Adding doc to writer this function itself fetches data from

              the instance of vcs backend"""

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              node = self.get_node(repo, path)

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
        marcink
    
#92 removed content of binary files for whoosh indexer

              r886
            
              #we just index the content of chosen files, and skip binary files

              if node.extension in INDEX_EXTENSIONS and not node.is_binary:

        marcink
    
fixes to #92, updated changelog

              r885
            
        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
                  u_content = node.content

        marcink
    
fixes to #92, updated changelog

              r885
            
                  if not isinstance(u_content, unicode):

                      log.warning('  >> %s Could not get this content as unicode '

                                'replacing with empty content', path)

                      u_content = u''

                  else:

                      log.debug('    >> %s [WITH CONTENT]' % path)

        marcink
    
renamed project to rhodecode

              r547
            
              else:

                  log.debug('    >> %s' % path)

                  #just index file name without it's content

                  u_content = u''

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
              writer.add_document(owner=unicode(repo.contact),

        marcink
    
changes for #56

              r1171
            
                              repository=safe_unicode(repo_name),

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
                              path=safe_unicode(path),

                              content=u_content,

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                              modtime=self.get_node_mtime(node),

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                              extension=node.extension)

        marcink
    
renamed project to rhodecode

              r547
            
          def build_index(self):

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              if os.path.exists(self.index_location):

        marcink
    
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....

              r560
            
                  log.debug('removing previous index')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
                  rmtree(self.index_location)

              if not os.path.exists(self.index_location):

                  os.mkdir(self.index_location)

              idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)

        marcink
    
renamed project to rhodecode

              r547
            
              writer = idx.writer()

        marcink
    
fixes #90 + docs update

              r894
            
        marcink
    
changes for #56

              r1171
            
              for repo_name, repo in self.repo_paths.items():

        marcink
    
renamed project to rhodecode

              r547
            
                  log.debug('building index @ %s' % repo.path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  for idx_path in self.get_paths(repo):

        marcink
    
changes for #56

              r1171
            
                      self.add_doc(writer, idx_path, repo, repo_name)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>> COMMITING CHANGES <<')

        marcink
    
renamed project to rhodecode

              r547
            
              writer.commit(merge=True)

              log.debug('>>> FINISHED BUILDING INDEX <<<')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def update_index(self):

              log.debug('STARTING INCREMENTAL INDEXING UPDATE')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
              idx = open_dir(self.index_location, indexname=self.indexname)

        marcink
    
renamed project to rhodecode

              r547
            
              # The set of all paths in the index

              indexed_paths = set()

              # The set of all paths we need to re-index

              to_index = set()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              reader = idx.reader()

              writer = idx.writer()

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              # Loop over the stored fields in the index

              for fields in reader.all_stored_fields():

                  indexed_path = fields['path']

                  indexed_paths.add(indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  repo = self.repo_paths[fields['repository']]

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  try:

                      node = self.get_node(repo, indexed_path)

        marcink
    
fixes issue with whoosh reindexing files that were removed or renamed

              r1711
            
                  except (ChangesetError, NodeDoesNotExistError):

        marcink
    
renamed project to rhodecode

              r547
            
                      # This file was deleted since it was indexed

                      log.debug('removing from index %s' % indexed_path)

                      writer.delete_by_term('path', indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
                  else:

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                      # Check if this file was changed since it was indexed

        marcink
    
renamed project to rhodecode

              r547
            
                      indexed_time = fields['modtime']

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                      mtime = self.get_node_mtime(node)

        marcink
    
renamed project to rhodecode

              r547
            
                      if mtime > indexed_time:

                          # The file has changed, delete it and add it to the list of

                          # files to reindex

                          log.debug('adding to reindex list %s' % indexed_path)

                          writer.delete_by_term('path', indexed_path)

                          to_index.add(indexed_path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
              # Loop over the files in the filesystem

              # Assume we have a function that gathers the filenames of the

              # documents to be indexed

        marcink
    
changes for #56

              r1171
            
              for repo_name, repo in self.repo_paths.items():

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                  for path in self.get_paths(repo):

        marcink
    
renamed project to rhodecode

              r547
            
                      if path in to_index or path not in indexed_paths:

                          # This is either a file that's changed, or a new file

                          # that wasn't indexed before. So index it!

        marcink
    
changes for #56

              r1171
            
                          self.add_doc(writer, path, repo, repo_name)

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
                          log.debug('re indexing %s' % path)

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>> COMMITING CHANGES <<')

        marcink
    
renamed project to rhodecode

              r547
            
              writer.commit(merge=True)

        marcink
    
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.

              r561
            
              log.debug('>>> FINISHED REBUILDING INDEX <<<')

        marcink
    
Hacking for git support,and new faster repo scan

              r631
            
        marcink
    
renamed project to rhodecode

              r547
            
          def run(self, full_index=False):

              """Run daemon"""

              if full_index or self.initial:

                  self.build_index()

              else:

                  self.update_index()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink fixes to #92, updated changelog	r885	# -- coding: utf-8 --
		"""
		rhodecode.lib.indexers.daemon
		~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

marcink fixed daemon typos	r1377	A daemon will read from task table and run tasks
marcink Fixed whoosh daemon, for depracated walk method	r947
marcink fixes to #92, updated changelog	r885	:created_on: Jan 26, 2010
		:author: marcink
marcink Fixed whoosh daemon, for depracated walk method	r947	:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
marcink fixes to #92, updated changelog	r885	:license: GPLv3, see COPYING for more details.
		"""
marcink fixed license issue #149	r1206	# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU General Public License as published by
		# the Free Software Foundation, either version 3 of the License, or
		# (at your option) any later version.
marcink Fixed whoosh daemon, for depracated walk method	r947	#
marcink renamed project to rhodecode	r547	# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
marcink Fixed whoosh daemon, for depracated walk method	r947	#
marcink renamed project to rhodecode	r547	# You should have received a copy of the GNU General Public License
marcink fixed license issue #149	r1206	# along with this program. If not, see <http://www.gnu.org/licenses/>.
marcink renamed project to rhodecode	r547
marcink simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function	r1154	import os
marcink renamed project to rhodecode	r547	import sys
marcink simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function	r1154	import logging
marcink fixes to #92, updated changelog	r885	import traceback
marcink simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function	r1154
		from shutil import rmtree
		from time import mktime

marcink renamed project to rhodecode	r547	from os.path import dirname as dn
		from os.path import join as jn

		#to get the rhodecode import
		project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
		sys.path.append(project_path)

marcink Hacking for git support,and new faster repo scan	r631
marcink Refactor codes for scm model...	r691	from rhodecode.model.scm import ScmModel
marcink simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function	r1154	from rhodecode.lib import safe_unicode
marcink Hacking for git support,and new faster repo scan	r631	from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
marcink renamed project to rhodecode	r547
marcink fixes issue with whoosh reindexing files that were removed or renamed	r1711	from vcs.exceptions import ChangesetError, RepositoryError, \
		NodeDoesNotExistError
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560
marcink simplified str2bool, and moved safe_unicode out of helpers since it was not html specific function	r1154	from whoosh.index import create_in, open_dir


marcink renamed project to rhodecode	r547
		log = logging.getLogger('whooshIndexer')
		# create logger
		log.setLevel(logging.DEBUG)
		log.propagate = False
		# create console handler and set level to debug
		ch = logging.StreamHandler()
		ch.setLevel(logging.DEBUG)

		# create formatter
marcink fixes issue #146	r1183	formatter = logging.Formatter("%(asctime)s - %(name)s -"
		" %(levelname)s - %(message)s")
marcink renamed project to rhodecode	r547
		# add formatter to ch
		ch.setFormatter(formatter)

		# add ch to logger
		log.addHandler(ch)

		class WhooshIndexingDaemon(object):
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	"""
marcink fixed daemon typos	r1377	Daemon for atomic jobs
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	"""
marcink renamed project to rhodecode	r547
marcink Hacking for git support,and new faster repo scan	r631	def __init__(self, indexname='HG_INDEX', index_location=None,
marcink fixes #90 + docs update	r894	repo_location=None, sa=None, repo_list=None):
marcink renamed project to rhodecode	r547	self.indexname = indexname
marcink Hacking for git support,and new faster repo scan	r631
		self.index_location = index_location
		if not index_location:
		raise Exception('You have to provide index location')

marcink renamed project to rhodecode	r547	self.repo_location = repo_location
marcink Hacking for git support,and new faster repo scan	r631	if not repo_location:
		raise Exception('You have to provide repositories location')

marcink Major refactoring, removed when possible calls to app globals....	r1036	self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
marcink fixes #90 + docs update	r894
		if repo_list:
		filtered_repo_paths = {}
		for repo_name, repo in self.repo_paths.items():
		if repo_name in repo_list:
marcink changes for #56	r1171	filtered_repo_paths[repo_name] = repo
marcink fixes #90 + docs update	r894
		self.repo_paths = filtered_repo_paths


marcink renamed project to rhodecode	r547	self.initial = False
marcink Hacking for git support,and new faster repo scan	r631	if not os.path.isdir(self.index_location):
marcink extended trending languages to more entries, implemented new faster and "fancy"...	r763	os.makedirs(self.index_location)
marcink renamed project to rhodecode	r547	log.info('Cannot run incremental index since it does not'
		' yet exist running full build')
		self.initial = True
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_paths(self, repo):
marcink Implemented whoosh index building as paster command....	r683	"""recursive walk in root dir and return a set of all path in that dir
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	based on repository walk function
		"""
marcink renamed project to rhodecode	r547	index_paths_ = set()
marcink fixed whoosh failure on new repository...	r567	try:
marcink Fixed whoosh daemon, for depracated walk method	r947	tip = repo.get_changeset('tip')
		for topnode, dirs, files in tip.walk('/'):
marcink renamed project to rhodecode	r547	for f in files:
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	index_paths_.add(jn(repo.path, f.path))
marcink Hacking for git support,and new faster repo scan	r631
marcink fixes to #92, updated changelog	r885	except RepositoryError, e:
		log.debug(traceback.format_exc())
marcink fixed whoosh failure on new repository...	r567	pass
marcink Hacking for git support,and new faster repo scan	r631	return index_paths_

marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_node(self, repo, path):
		n_path = path[len(repo.path) + 1:]
		node = repo.get_changeset().get_node(n_path)
		return node
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	def get_node_mtime(self, node):
		return mktime(node.last_changeset.date.timetuple())
marcink Hacking for git support,and new faster repo scan	r631
marcink changes for #56	r1171	def add_doc(self, writer, path, repo, repo_name):
marcink Implemented whoosh index building as paster command....	r683	"""Adding doc to writer this function itself fetches data from
		the instance of vcs backend"""
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	node = self.get_node(repo, path)
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560
marcink #92 removed content of binary files for whoosh indexer	r886	#we just index the content of chosen files, and skip binary files
		if node.extension in INDEX_EXTENSIONS and not node.is_binary:
marcink fixes to #92, updated changelog	r885
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	u_content = node.content
marcink fixes to #92, updated changelog	r885	if not isinstance(u_content, unicode):
		log.warning(' >> %s Could not get this content as unicode '
		'replacing with empty content', path)
		u_content = u''
		else:
		log.debug(' >> %s [WITH CONTENT]' % path)

marcink renamed project to rhodecode	r547	else:
		log.debug(' >> %s' % path)
		#just index file name without it's content
		u_content = u''
marcink Hacking for git support,and new faster repo scan	r631
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	writer.add_document(owner=unicode(repo.contact),
marcink changes for #56	r1171	repository=safe_unicode(repo_name),
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	path=safe_unicode(path),
		content=u_content,
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	modtime=self.get_node_mtime(node),
marcink Hacking for git support,and new faster repo scan	r631	extension=node.extension)

marcink renamed project to rhodecode	r547
		def build_index(self):
marcink Hacking for git support,and new faster repo scan	r631	if os.path.exists(self.index_location):
marcink rewrote whoosh indexing to run internal repository.walk() instead of filesystem....	r560	log.debug('removing previous index')
marcink Hacking for git support,and new faster repo scan	r631	rmtree(self.index_location)

		if not os.path.exists(self.index_location):
		os.mkdir(self.index_location)

		idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
marcink renamed project to rhodecode	r547	writer = idx.writer()
marcink fixes #90 + docs update	r894
marcink changes for #56	r1171	for repo_name, repo in self.repo_paths.items():
marcink renamed project to rhodecode	r547	log.debug('building index @ %s' % repo.path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	for idx_path in self.get_paths(repo):
marcink changes for #56	r1171	self.add_doc(writer, idx_path, repo, repo_name)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>> COMMITING CHANGES <<')
marcink renamed project to rhodecode	r547	writer.commit(merge=True)
		log.debug('>>> FINISHED BUILDING INDEX <<<')
marcink Hacking for git support,and new faster repo scan	r631

marcink renamed project to rhodecode	r547	def update_index(self):
		log.debug('STARTING INCREMENTAL INDEXING UPDATE')
marcink Hacking for git support,and new faster repo scan	r631
		idx = open_dir(self.index_location, indexname=self.indexname)
marcink renamed project to rhodecode	r547	# The set of all paths in the index
		indexed_paths = set()
		# The set of all paths we need to re-index
		to_index = set()
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	reader = idx.reader()
		writer = idx.writer()
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	# Loop over the stored fields in the index
		for fields in reader.all_stored_fields():
		indexed_path = fields['path']
		indexed_paths.add(indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	repo = self.repo_paths[fields['repository']]
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	try:
		node = self.get_node(repo, indexed_path)
marcink fixes issue with whoosh reindexing files that were removed or renamed	r1711	except (ChangesetError, NodeDoesNotExistError):
marcink renamed project to rhodecode	r547	# This file was deleted since it was indexed
		log.debug('removing from index %s' % indexed_path)
		writer.delete_by_term('path', indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	else:
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	# Check if this file was changed since it was indexed
marcink renamed project to rhodecode	r547	indexed_time = fields['modtime']
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	mtime = self.get_node_mtime(node)
marcink renamed project to rhodecode	r547	if mtime > indexed_time:
		# The file has changed, delete it and add it to the list of
		# files to reindex
		log.debug('adding to reindex list %s' % indexed_path)
		writer.delete_by_term('path', indexed_path)
		to_index.add(indexed_path)
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	# Loop over the files in the filesystem
		# Assume we have a function that gathers the filenames of the
		# documents to be indexed
marcink changes for #56	r1171	for repo_name, repo in self.repo_paths.items():
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	for path in self.get_paths(repo):
marcink renamed project to rhodecode	r547	if path in to_index or path not in indexed_paths:
		# This is either a file that's changed, or a new file
		# that wasn't indexed before. So index it!
marcink changes for #56	r1171	self.add_doc(writer, path, repo, repo_name)
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('re indexing %s' % path)
marcink Hacking for git support,and new faster repo scan	r631
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>> COMMITING CHANGES <<')
marcink renamed project to rhodecode	r547	writer.commit(merge=True)
marcink fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.	r561	log.debug('>>> FINISHED REBUILDING INDEX <<<')
marcink Hacking for git support,and new faster repo scan	r631
marcink renamed project to rhodecode	r547	def run(self, full_index=False):
		"""Run daemon"""
		if full_index or self.initial:
		self.build_index()
		else:
		self.update_index()