##// END OF EJS Templates
when indexing changesets use the raw_id to locate the point from...
Indra Talip -
r2643:2ad50c44 beta
parent child Browse files
Show More
@@ -74,7 +74,6 b' FRAGMENTER = ContextFragmenter(200)'
74 74
75 75 CHGSETS_SCHEMA = Schema(
76 76 raw_id=ID(unique=True, stored=True),
77 revision=NUMERIC(unique=True, stored=True),
78 77 last=BOOLEAN(),
79 78 owner=TEXT(),
80 79 repository=ID(unique=True, stored=True),
@@ -168,23 +168,34 b' class WhooshIndexingDaemon(object):'
168 168 )
169 169 return indexed, indexed_w_content
170 170
171 def index_changesets(self, writer, repo_name, repo, start_rev=0):
171 def index_changesets(self, writer, repo_name, repo, start_rev=None):
172 172 """
173 173 Add all changeset in the vcs repo starting at start_rev
174 174 to the index writer
175
176 :param writer: the whoosh index writer to add to
177 :param repo_name: name of the repository from whence the
178 changeset originates including the repository group
179 :param repo: the vcs repository instance to index changesets for,
180 the presumption is the repo has changesets to index
181 :param start_rev=None: the full sha id to start indexing from
182 if start_rev is None then index from the first changeset in
183 the repo
175 184 """
176 185
177 log.debug('indexing changesets in %s[%d:]' % (repo_name, start_rev))
186 if start_rev is None:
187 start_rev = repo[0].raw_id
188
189 log.debug('indexing changesets in %s starting at rev: %s' % (repo_name, start_rev))
178 190
179 191 indexed=0
180 for cs in repo[start_rev:]:
192 for cs in repo.get_changesets(start=start_rev):
181 193 writer.add_document(
182 194 raw_id=unicode(cs.raw_id),
183 195 owner=unicode(repo.contact),
184 196 repository=safe_unicode(repo_name),
185 197 author=cs.author,
186 198 message=cs.message,
187 revision=cs.revision,
188 199 last=cs.last,
189 200 added=u' '.join([node.path for node in cs.added]).lower(),
190 201 removed=u' '.join([node.path for node in cs.removed]).lower(),
@@ -214,21 +225,27 b' class WhooshIndexingDaemon(object):'
214 225 try:
215 226 for repo_name, repo in self.repo_paths.items():
216 227 # skip indexing if there aren't any revs in the repo
217 revs = repo.revisions
218 if len(revs) < 1:
228 num_of_revs = len(repo)
229 if num_of_revs < 1:
219 230 continue
220 231
221 232 qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
222 233 q = qp.parse(u"last:t AND %s" % repo_name)
223 234
224 results = searcher.search(q, sortedby='revision')
235 results = searcher.search(q)
225 236
237 # default to scanning the entire repo
226 238 last_rev = 0
239 start_id = None
240
227 241 if len(results) > 0:
228 last_rev = results[0]['revision']
242 # assuming that there is only one result, if not this
243 # may require a full re-index.
244 start_id = results[0]['raw_id']
245 last_rev = repo.get_changeset(revision=start_id).revision
229 246
230 247 # there are new changesets to index or a new repo to index
231 if last_rev == 0 or len(revs) > last_rev + 1:
248 if last_rev == 0 or num_of_revs > last_rev + 1:
232 249 # delete the docs in the index for the previous last changeset(s)
233 250 for hit in results:
234 251 q = qp.parse(u"last:t AND %s AND raw_id:%s" %
@@ -236,7 +253,7 b' class WhooshIndexingDaemon(object):'
236 253 writer.delete_by_query(q)
237 254
238 255 # index from the previous last changeset + all new ones
239 self.index_changesets(writer, repo_name, repo, last_rev)
256 self.index_changesets(writer, repo_name, repo, start_id)
240 257 writer_is_dirty = True
241 258
242 259 finally:
@@ -72,7 +72,7 b' class TestSearchController(TestControlle'
72 72 def test_search_author(self):
73 73 self.log_user()
74 74 response = self.app.get(url(controller='search', action='index'),
75 {'q': 'author:marcin@python-blog.com revision:0',
75 {'q': 'author:marcin@python-blog.com raw_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545',
76 76 'type': 'commit'})
77 77
78 78 response.mustcontain('1 results')
General Comments 0
You need to be logged in to leave comments. Login now