##// END OF EJS Templates
fixed reindexing, and made some optimizations to reuse repo instances from repo scann list.
marcink -
r561:5f3b967d default
parent child Browse files
Show More
@@ -40,7 +40,7 b' from shutil import rmtree'
40 40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
41 41
42 42 from time import mktime
43 from vcs.backends import hg
43 from vcs.exceptions import ChangesetError
44 44
45 45 import logging
46 46
@@ -72,6 +72,7 b' class WhooshIndexingDaemon(object):'
72 72 def __init__(self, indexname='HG_INDEX', repo_location=None):
73 73 self.indexname = indexname
74 74 self.repo_location = repo_location
75 self.repo_paths = scan_paths(self.repo_location)
75 76 self.initial = False
76 77 if not os.path.isdir(IDX_LOCATION):
77 78 os.mkdir(IDX_LOCATION)
@@ -79,27 +80,32 b' class WhooshIndexingDaemon(object):'
79 80 ' yet exist running full build')
80 81 self.initial = True
81 82
82 def get_paths(self, root_dir):
83 def get_paths(self, repo):
83 84 """
84 85 recursive walk in root dir and return a set of all path in that dir
85 86 based on repository walk function
86 87 """
87 repo = hg.MercurialRepository(root_dir)
88 88 index_paths_ = set()
89 89 for topnode, dirs, files in repo.walk('/', 'tip'):
90 90 for f in files:
91 index_paths_.add(jn(root_dir, f.path))
91 index_paths_.add(jn(repo.path, f.path))
92 92 for dir in dirs:
93 93 for f in files:
94 index_paths_.add(jn(root_dir, f.path))
94 index_paths_.add(jn(repo.path, f.path))
95 95
96 96 return index_paths_
97
98
97
98 def get_node(self, repo, path):
99 n_path = path[len(repo.path) + 1:]
100 node = repo.get_changeset().get_node(n_path)
101 return node
102
103 def get_node_mtime(self, node):
104 return mktime(node.last_changeset.date.timetuple())
105
99 106 def add_doc(self, writer, path, repo):
100 107 """Adding doc to writer"""
101 n_path = path[len(repo.path) + 1:]
102 node = repo.get_changeset().get_node(n_path)
108 node = self.get_node(repo, path)
103 109
104 110 #we just index the content of chosen files
105 111 if node.extension in INDEX_EXTENSIONS:
@@ -114,7 +120,7 b' class WhooshIndexingDaemon(object):'
114 120 repository=safe_unicode(repo.name),
115 121 path=safe_unicode(path),
116 122 content=u_content,
117 modtime=mktime(node.last_changeset.date.timetuple()),
123 modtime=self.get_node_mtime(node),
118 124 extension=node.extension)
119 125
120 126
@@ -129,13 +135,14 b' class WhooshIndexingDaemon(object):'
129 135 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
130 136 writer = idx.writer()
131 137
132 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
138 for cnt, repo in enumerate(self.repo_paths.values()):
133 139 log.debug('building index @ %s' % repo.path)
134 140
135 for idx_path in self.get_paths(repo.path):
141 for idx_path in self.get_paths(repo):
136 142 self.add_doc(writer, idx_path, repo)
143
144 log.debug('>> COMMITING CHANGES <<')
137 145 writer.commit(merge=True)
138
139 146 log.debug('>>> FINISHED BUILDING INDEX <<<')
140 147
141 148
@@ -155,42 +162,41 b' class WhooshIndexingDaemon(object):'
155 162 for fields in reader.all_stored_fields():
156 163 indexed_path = fields['path']
157 164 indexed_paths.add(indexed_path)
158
159 if not os.path.exists(indexed_path):
165
166 repo = self.repo_paths[fields['repository']]
167
168 try:
169 node = self.get_node(repo, indexed_path)
170 except ChangesetError:
160 171 # This file was deleted since it was indexed
161 172 log.debug('removing from index %s' % indexed_path)
162 173 writer.delete_by_term('path', indexed_path)
163 174
164 175 else:
165 # Check if this file was changed since it
166 # was indexed
176 # Check if this file was changed since it was indexed
167 177 indexed_time = fields['modtime']
168
169 mtime = os.path.getmtime(indexed_path)
170
178 mtime = self.get_node_mtime(node)
171 179 if mtime > indexed_time:
172
173 180 # The file has changed, delete it and add it to the list of
174 181 # files to reindex
175 182 log.debug('adding to reindex list %s' % indexed_path)
176 183 writer.delete_by_term('path', indexed_path)
177 184 to_index.add(indexed_path)
178 #writer.commit()
179 185
180 186 # Loop over the files in the filesystem
181 187 # Assume we have a function that gathers the filenames of the
182 188 # documents to be indexed
183 for repo in scan_paths(self.repo_location).values():
184 for path in self.get_paths(repo.path):
189 for repo in self.repo_paths.values():
190 for path in self.get_paths(repo):
185 191 if path in to_index or path not in indexed_paths:
186 192 # This is either a file that's changed, or a new file
187 193 # that wasn't indexed before. So index it!
188 194 self.add_doc(writer, path, repo)
189 log.debug('reindexing %s' % path)
190
195 log.debug('re indexing %s' % path)
196
197 log.debug('>> COMMITING CHANGES <<')
191 198 writer.commit(merge=True)
192 #idx.optimize()
193 log.debug('>>> FINISHED <<<')
199 log.debug('>>> FINISHED REBUILDING INDEX <<<')
194 200
195 201 def run(self, full_index=False):
196 202 """Run daemon"""
General Comments 0
You need to be logged in to leave comments. Login now