upstream/kallithea Commit - r2643:2ad50c44

when indexing changesets use the raw_id to locate the point from...

Indra Talip -

r2643:2ad50c44 beta

parent child

rhodecode/lib/indexers/__init__.py

0 0 -1

              CHGSETS_SCHEMA = Schema(
                  raw_id=ID(unique=True, stored=True),
-                 revision=NUMERIC(unique=True, stored=True),
                  last=BOOLEAN(),
                  owner=TEXT(),
                  repository=ID(unique=True, stored=True),

rhodecode/lib/indexers/daemon.py

0 +27 -10

                      )
                      return indexed, indexed_w_content
-                 def index_changesets(self, writer, repo_name, repo, start_rev=0):
+                 def index_changesets(self, writer, repo_name, repo, start_rev=None):
                      """
                      Add all changeset in the vcs repo starting at start_rev
                      to the index writer
+                     :param writer: the whoosh index writer to add to
+                     :param repo_name: name of the repository from whence the
+                       changeset originates including the repository group
+                     :param repo: the vcs repository instance to index changesets for,
+                       the presumption is the repo has changesets to index
+                     :param start_rev=None: the full sha id to start indexing from
+                       if start_rev is None then index from the first changeset in
+                       the repo
                      """
-                     log.debug('indexing changesets in %s[%d:]' % (repo_name, start_rev))
+                     if start_rev is None:
+                         start_rev = repo[0].raw_id
+                     log.debug('indexing changesets in %s starting at rev: %s' % (repo_name, start_rev))
                      indexed=0
-                     for cs in repo[start_rev:]:
+                     for cs in repo.get_changesets(start=start_rev):
                          writer.add_document(
                              raw_id=unicode(cs.raw_id),
                              owner=unicode(repo.contact),
                              repository=safe_unicode(repo_name),
                              author=cs.author,
                              message=cs.message,
-                             revision=cs.revision,
                              last=cs.last,
                              added=u' '.join([node.path for node in cs.added]).lower(),
                              removed=u' '.join([node.path for node in cs.removed]).lower(),
                          try:
                              for repo_name, repo in self.repo_paths.items():
                                  # skip indexing if there aren't any revs in the repo
-                                 revs = repo.revisions
-                                 if len(revs) < 1:
+                                 num_of_revs = len(repo)
+                                 if num_of_revs < 1:
                                      continue
                                  qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
                                  q = qp.parse(u"last:t AND %s" % repo_name)
-                                 results = searcher.search(q, sortedby='revision')
+                                 results = searcher.search(q)
+                                 # default to scanning the entire repo
                                  last_rev = 0
+                                 start_id = None
                                  if len(results) > 0:
-                                     last_rev = results[0]['revision']
+                                     # assuming that there is only one result, if not this
+                                     # may require a full re-index.
+                                     start_id = results[0]['raw_id']
+                                     last_rev = repo.get_changeset(revision=start_id).revision
                                  # there are new changesets to index or a new repo to index
-                                 if last_rev == 0 or len(revs) > last_rev + 1:
+                                 if last_rev == 0 or num_of_revs > last_rev + 1:
                                      # delete the docs in the index for the previous last changeset(s)
                                      for hit in results:
                                          q = qp.parse(u"last:t AND %s AND raw_id:%s" %
                                          writer.delete_by_query(q)
                                      # index from the previous last changeset + all new ones
-                                     self.index_changesets(writer, repo_name, repo, last_rev)
+                                     self.index_changesets(writer, repo_name, repo, start_id)
                                      writer_is_dirty = True
                          finally:

rhodecode/tests/functional/test_search.py

0 +1 -1

                  def test_search_author(self):
                      self.log_user()
                      response = self.app.get(url(controller='search', action='index'),
-                                             {'q': 'author:marcin@python-blog.com revision:0',
+                                             {'q': 'author:marcin@python-blog.com raw_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545',
                                               'type': 'commit'})
                      response.mustcontain('1 results')

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages