##// END OF EJS Templates
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....
marcink -
r560:3072935b default
parent child Browse files
Show More
@@ -115,6 +115,7 b' class DbManage(object):'
115 hooks1.ui_section = 'hooks'
115 hooks1.ui_section = 'hooks'
116 hooks1.ui_key = 'changegroup.update'
116 hooks1.ui_key = 'changegroup.update'
117 hooks1.ui_value = 'hg update >&2'
117 hooks1.ui_value = 'hg update >&2'
118 hooks1.ui_active = False
118
119
119 hooks2 = RhodeCodeUi()
120 hooks2 = RhodeCodeUi()
120 hooks2.ui_section = 'hooks'
121 hooks2.ui_section = 'hooks'
@@ -39,6 +39,9 b' from whoosh.index import create_in, open'
39 from shutil import rmtree
39 from shutil import rmtree
40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
41
41
42 from time import mktime
43 from vcs.backends import hg
44
42 import logging
45 import logging
43
46
44 log = logging.getLogger('whooshIndexer')
47 log = logging.getLogger('whooshIndexer')
@@ -62,7 +65,9 b' def scan_paths(root_location):'
62 return HgModel.repo_scan('/', root_location, None, True)
65 return HgModel.repo_scan('/', root_location, None, True)
63
66
64 class WhooshIndexingDaemon(object):
67 class WhooshIndexingDaemon(object):
65 """Deamon for atomic jobs"""
68 """
69 Deamon for atomic jobs
70 """
66
71
67 def __init__(self, indexname='HG_INDEX', repo_location=None):
72 def __init__(self, indexname='HG_INDEX', repo_location=None):
68 self.indexname = indexname
73 self.indexname = indexname
@@ -73,55 +78,49 b' class WhooshIndexingDaemon(object):'
73 log.info('Cannot run incremental index since it does not'
78 log.info('Cannot run incremental index since it does not'
74 ' yet exist running full build')
79 ' yet exist running full build')
75 self.initial = True
80 self.initial = True
76
81
77 def get_paths(self, root_dir):
82 def get_paths(self, root_dir):
78 """recursive walk in root dir and return a set of all path in that dir
83 """
79 excluding files in .hg dir"""
84 recursive walk in root dir and return a set of all path in that dir
85 based on repository walk function
86 """
87 repo = hg.MercurialRepository(root_dir)
80 index_paths_ = set()
88 index_paths_ = set()
81 for path, dirs, files in os.walk(root_dir):
89 for topnode, dirs, files in repo.walk('/', 'tip'):
82 if path.find('.hg') == -1:
90 for f in files:
91 index_paths_.add(jn(root_dir, f.path))
92 for dir in dirs:
83 for f in files:
93 for f in files:
84 index_paths_.add(jn(path, f))
94 index_paths_.add(jn(root_dir, f.path))
85
95
86 return index_paths_
96 return index_paths_
87
97
98
88 def add_doc(self, writer, path, repo):
99 def add_doc(self, writer, path, repo):
89 """Adding doc to writer"""
100 """Adding doc to writer"""
90
101 n_path = path[len(repo.path) + 1:]
91 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
102 node = repo.get_changeset().get_node(n_path)
92 #we just index the content of choosen files
103
93 if ext in INDEX_EXTENSIONS:
104 #we just index the content of chosen files
105 if node.extension in INDEX_EXTENSIONS:
94 log.debug(' >> %s [WITH CONTENT]' % path)
106 log.debug(' >> %s [WITH CONTENT]' % path)
95 fobj = open(path, 'rb')
107 u_content = node.content
96 content = fobj.read()
97 fobj.close()
98 u_content = safe_unicode(content)
99 else:
108 else:
100 log.debug(' >> %s' % path)
109 log.debug(' >> %s' % path)
101 #just index file name without it's content
110 #just index file name without it's content
102 u_content = u''
111 u_content = u''
103
112
104
113 writer.add_document(owner=unicode(repo.contact),
105
114 repository=safe_unicode(repo.name),
106 try:
115 path=safe_unicode(path),
107 os.stat(path)
116 content=u_content,
108 writer.add_document(owner=unicode(repo.contact),
117 modtime=mktime(node.last_changeset.date.timetuple()),
109 repository=safe_unicode(repo.name),
118 extension=node.extension)
110 path=safe_unicode(path),
111 content=u_content,
112 modtime=os.path.getmtime(path),
113 extension=ext)
114 except OSError, e:
115 import errno
116 if e.errno == errno.ENOENT:
117 log.debug('path %s does not exist or is a broken symlink' % path)
118 else:
119 raise e
120
119
121
120
122 def build_index(self):
121 def build_index(self):
123 if os.path.exists(IDX_LOCATION):
122 if os.path.exists(IDX_LOCATION):
124 log.debug('removing previos index')
123 log.debug('removing previous index')
125 rmtree(IDX_LOCATION)
124 rmtree(IDX_LOCATION)
126
125
127 if not os.path.exists(IDX_LOCATION):
126 if not os.path.exists(IDX_LOCATION):
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now