##// END OF EJS Templates
when indexing changesets use the raw_id to locate the point from...
Indra Talip -
r2643:2ad50c44 beta
parent child Browse files
Show More
@@ -1,265 +1,264
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.__init__
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Whoosh indexing module for RhodeCode
7 7
8 8 :created_on: Aug 17, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 import os
26 26 import sys
27 27 import traceback
28 28 import logging
29 29 from os.path import dirname as dn, join as jn
30 30
31 31 #to get the rhodecode import
32 32 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
33 33
34 34 from string import strip
35 35 from shutil import rmtree
36 36
37 37 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
38 38 from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType
39 39 from whoosh.index import create_in, open_dir
40 40 from whoosh.formats import Characters
41 41 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
42 42
43 43 from webhelpers.html.builder import escape, literal
44 44 from sqlalchemy import engine_from_config
45 45
46 46 from rhodecode.model import init_model
47 47 from rhodecode.model.scm import ScmModel
48 48 from rhodecode.model.repo import RepoModel
49 49 from rhodecode.config.environment import load_environment
50 50 from rhodecode.lib.utils2 import LazyProperty
51 51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\
52 52 load_rcextensions
53 53
54 54 log = logging.getLogger(__name__)
55 55
56 56 # CUSTOM ANALYZER wordsplit + lowercase filter
57 57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
58 58
59 59 #INDEX SCHEMA DEFINITION
60 60 SCHEMA = Schema(
61 61 fileid=ID(unique=True),
62 62 owner=TEXT(),
63 63 repository=TEXT(stored=True),
64 64 path=TEXT(stored=True),
65 65 content=FieldType(format=Characters(), analyzer=ANALYZER,
66 66 scorable=True, stored=True),
67 67 modtime=STORED(),
68 68 extension=TEXT(stored=True)
69 69 )
70 70
71 71 IDX_NAME = 'HG_INDEX'
72 72 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
73 73 FRAGMENTER = ContextFragmenter(200)
74 74
75 75 CHGSETS_SCHEMA = Schema(
76 76 raw_id=ID(unique=True, stored=True),
77 revision=NUMERIC(unique=True, stored=True),
78 77 last=BOOLEAN(),
79 78 owner=TEXT(),
80 79 repository=ID(unique=True, stored=True),
81 80 author=TEXT(stored=True),
82 81 message=FieldType(format=Characters(), analyzer=ANALYZER,
83 82 scorable=True, stored=True),
84 83 parents=TEXT(),
85 84 added=TEXT(),
86 85 removed=TEXT(),
87 86 changed=TEXT(),
88 87 )
89 88
90 89 CHGSET_IDX_NAME = 'CHGSET_INDEX'
91 90
92 91 class MakeIndex(BasePasterCommand):
93 92
94 93 max_args = 1
95 94 min_args = 1
96 95
97 96 usage = "CONFIG_FILE"
98 97 summary = "Creates index for full text search given configuration file"
99 98 group_name = "RhodeCode"
100 99 takes_config_file = -1
101 100 parser = Command.standard_parser(verbose=True)
102 101
103 102 def command(self):
104 103 logging.config.fileConfig(self.path_to_ini_file)
105 104 from pylons import config
106 105 add_cache(config)
107 106 engine = engine_from_config(config, 'sqlalchemy.db1.')
108 107 init_model(engine)
109 108 index_location = config['index_dir']
110 109 repo_location = self.options.repo_location \
111 110 if self.options.repo_location else RepoModel().repos_path
112 111 repo_list = map(strip, self.options.repo_list.split(',')) \
113 112 if self.options.repo_list else None
114 113 repo_update_list = map(strip, self.options.repo_update_list.split(',')) \
115 114 if self.options.repo_update_list else None
116 115 load_rcextensions(config['here'])
117 116 #======================================================================
118 117 # WHOOSH DAEMON
119 118 #======================================================================
120 119 from rhodecode.lib.pidlock import LockHeld, DaemonLock
121 120 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
122 121 try:
123 122 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
124 123 WhooshIndexingDaemon(index_location=index_location,
125 124 repo_location=repo_location,
126 125 repo_list=repo_list,
127 126 repo_update_list=repo_update_list)\
128 127 .run(full_index=self.options.full_index)
129 128 l.release()
130 129 except LockHeld:
131 130 sys.exit(1)
132 131
133 132 def update_parser(self):
134 133 self.parser.add_option('--repo-location',
135 134 action='store',
136 135 dest='repo_location',
137 136 help="Specifies repositories location to index OPTIONAL",
138 137 )
139 138 self.parser.add_option('--index-only',
140 139 action='store',
141 140 dest='repo_list',
142 141 help="Specifies a comma separated list of repositores "
143 142 "to build index on. If not given all repositories "
144 143 "are scanned for indexing. OPTIONAL",
145 144 )
146 145 self.parser.add_option('--update-only',
147 146 action='store',
148 147 dest='repo_update_list',
149 148 help="Specifies a comma separated list of repositores "
150 149 "to re-build index on. OPTIONAL",
151 150 )
152 151 self.parser.add_option('-f',
153 152 action='store_true',
154 153 dest='full_index',
155 154 help="Specifies that index should be made full i.e"
156 155 " destroy old and build from scratch",
157 156 default=False)
158 157
159 158
160 159 class WhooshResultWrapper(object):
161 160 def __init__(self, search_type, searcher, matcher, highlight_items,
162 161 repo_location):
163 162 self.search_type = search_type
164 163 self.searcher = searcher
165 164 self.matcher = matcher
166 165 self.highlight_items = highlight_items
167 166 self.fragment_size = 200
168 167 self.repo_location = repo_location
169 168
170 169 @LazyProperty
171 170 def doc_ids(self):
172 171 docs_id = []
173 172 while self.matcher.is_active():
174 173 docnum = self.matcher.id()
175 174 chunks = [offsets for offsets in self.get_chunks()]
176 175 docs_id.append([docnum, chunks])
177 176 self.matcher.next()
178 177 return docs_id
179 178
180 179 def __str__(self):
181 180 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
182 181
183 182 def __repr__(self):
184 183 return self.__str__()
185 184
186 185 def __len__(self):
187 186 return len(self.doc_ids)
188 187
189 188 def __iter__(self):
190 189 """
191 190 Allows Iteration over results,and lazy generate content
192 191
193 192 *Requires* implementation of ``__getitem__`` method.
194 193 """
195 194 for docid in self.doc_ids:
196 195 yield self.get_full_content(docid)
197 196
198 197 def __getitem__(self, key):
199 198 """
200 199 Slicing of resultWrapper
201 200 """
202 201 i, j = key.start, key.stop
203 202
204 203 slices = []
205 204 for docid in self.doc_ids[i:j]:
206 205 slices.append(self.get_full_content(docid))
207 206 return slices
208 207
209 208 def get_full_content(self, docid):
210 209 res = self.searcher.stored_fields(docid[0])
211 210 log.debug('result: %s' % res)
212 211 if self.search_type == 'content':
213 212 full_repo_path = jn(self.repo_location, res['repository'])
214 213 f_path = res['path'].split(full_repo_path)[-1]
215 214 f_path = f_path.lstrip(os.sep)
216 215 content_short = self.get_short_content(res, docid[1])
217 216 res.update({'content_short': content_short,
218 217 'content_short_hl': self.highlight(content_short),
219 218 'f_path': f_path
220 219 })
221 220 elif self.search_type == 'message':
222 221 res.update({'message_hl': self.highlight(res['message'])})
223 222
224 223 log.debug('result: %s' % res)
225 224
226 225 return res
227 226
228 227 def get_short_content(self, res, chunks):
229 228
230 229 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
231 230
232 231 def get_chunks(self):
233 232 """
234 233 Smart function that implements chunking the content
235 234 but not overlap chunks so it doesn't highlight the same
236 235 close occurrences twice.
237 236
238 237 :param matcher:
239 238 :param size:
240 239 """
241 240 memory = [(0, 0)]
242 241 if self.matcher.supports('positions'):
243 242 for span in self.matcher.spans():
244 243 start = span.startchar or 0
245 244 end = span.endchar or 0
246 245 start_offseted = max(0, start - self.fragment_size)
247 246 end_offseted = end + self.fragment_size
248 247
249 248 if start_offseted < memory[-1][1]:
250 249 start_offseted = memory[-1][1]
251 250 memory.append((start_offseted, end_offseted,))
252 251 yield (start_offseted, end_offseted,)
253 252
254 253 def highlight(self, content, top=5):
255 254 if self.search_type not in ['content', 'message']:
256 255 return ''
257 256 hl = highlight(
258 257 text=content,
259 258 terms=self.highlight_items,
260 259 analyzer=ANALYZER,
261 260 fragmenter=FRAGMENTER,
262 261 formatter=FORMATTER,
263 262 top=top
264 263 )
265 264 return hl
@@ -1,373 +1,390
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.daemon
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 A daemon will read from task table and run tasks
7 7
8 8 :created_on: Jan 26, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 from __future__ import with_statement
26 26
27 27 import os
28 28 import sys
29 29 import logging
30 30 import traceback
31 31
32 32 from shutil import rmtree
33 33 from time import mktime
34 34
35 35 from os.path import dirname as dn
36 36 from os.path import join as jn
37 37
38 38 #to get the rhodecode import
39 39 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
40 40 sys.path.append(project_path)
41 41
42 42 from rhodecode.config.conf import INDEX_EXTENSIONS
43 43 from rhodecode.model.scm import ScmModel
44 44 from rhodecode.lib.utils2 import safe_unicode
45 45 from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, CHGSET_IDX_NAME
46 46
47 47 from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
48 48 NodeDoesNotExistError
49 49
50 50 from whoosh.index import create_in, open_dir, exists_in
51 51 from whoosh.query import *
52 52 from whoosh.qparser import QueryParser
53 53
54 54 log = logging.getLogger('whoosh_indexer')
55 55
56 56
57 57 class WhooshIndexingDaemon(object):
58 58 """
59 59 Daemon for atomic indexing jobs
60 60 """
61 61
62 62 def __init__(self, indexname=IDX_NAME, index_location=None,
63 63 repo_location=None, sa=None, repo_list=None,
64 64 repo_update_list=None):
65 65 self.indexname = indexname
66 66
67 67 self.index_location = index_location
68 68 if not index_location:
69 69 raise Exception('You have to provide index location')
70 70
71 71 self.repo_location = repo_location
72 72 if not repo_location:
73 73 raise Exception('You have to provide repositories location')
74 74
75 75 self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
76 76
77 77 #filter repo list
78 78 if repo_list:
79 79 self.filtered_repo_paths = {}
80 80 for repo_name, repo in self.repo_paths.items():
81 81 if repo_name in repo_list:
82 82 self.filtered_repo_paths[repo_name] = repo
83 83
84 84 self.repo_paths = self.filtered_repo_paths
85 85
86 86 #filter update repo list
87 87 self.filtered_repo_update_paths = {}
88 88 if repo_update_list:
89 89 self.filtered_repo_update_paths = {}
90 90 for repo_name, repo in self.repo_paths.items():
91 91 if repo_name in repo_update_list:
92 92 self.filtered_repo_update_paths[repo_name] = repo
93 93 self.repo_paths = self.filtered_repo_update_paths
94 94
95 95 self.initial = True
96 96 if not os.path.isdir(self.index_location):
97 97 os.makedirs(self.index_location)
98 98 log.info('Cannot run incremental index since it does not'
99 99 ' yet exist running full build')
100 100 elif not exists_in(self.index_location, IDX_NAME):
101 101 log.info('Running full index build as the file content'
102 102 ' index does not exist')
103 103 elif not exists_in(self.index_location, CHGSET_IDX_NAME):
104 104 log.info('Running full index build as the changeset'
105 105 ' index does not exist')
106 106 else:
107 107 self.initial = False
108 108
109 109 def get_paths(self, repo):
110 110 """
111 111 recursive walk in root dir and return a set of all path in that dir
112 112 based on repository walk function
113 113 """
114 114 index_paths_ = set()
115 115 try:
116 116 tip = repo.get_changeset('tip')
117 117 for topnode, dirs, files in tip.walk('/'):
118 118 for f in files:
119 119 index_paths_.add(jn(repo.path, f.path))
120 120
121 121 except RepositoryError, e:
122 122 log.debug(traceback.format_exc())
123 123 pass
124 124 return index_paths_
125 125
126 126 def get_node(self, repo, path):
127 127 n_path = path[len(repo.path) + 1:]
128 128 node = repo.get_changeset().get_node(n_path)
129 129 return node
130 130
131 131 def get_node_mtime(self, node):
132 132 return mktime(node.last_changeset.date.timetuple())
133 133
134 134 def add_doc(self, writer, path, repo, repo_name):
135 135 """
136 136 Adding doc to writer this function itself fetches data from
137 137 the instance of vcs backend
138 138 """
139 139
140 140 node = self.get_node(repo, path)
141 141 indexed = indexed_w_content = 0
142 142 # we just index the content of chosen files, and skip binary files
143 143 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
144 144 u_content = node.content
145 145 if not isinstance(u_content, unicode):
146 146 log.warning(' >> %s Could not get this content as unicode '
147 147 'replacing with empty content' % path)
148 148 u_content = u''
149 149 else:
150 150 log.debug(' >> %s [WITH CONTENT]' % path)
151 151 indexed_w_content += 1
152 152
153 153 else:
154 154 log.debug(' >> %s' % path)
155 155 # just index file name without it's content
156 156 u_content = u''
157 157 indexed += 1
158 158
159 159 p = safe_unicode(path)
160 160 writer.add_document(
161 161 fileid=p,
162 162 owner=unicode(repo.contact),
163 163 repository=safe_unicode(repo_name),
164 164 path=p,
165 165 content=u_content,
166 166 modtime=self.get_node_mtime(node),
167 167 extension=node.extension
168 168 )
169 169 return indexed, indexed_w_content
170 170
171 def index_changesets(self, writer, repo_name, repo, start_rev=0):
171 def index_changesets(self, writer, repo_name, repo, start_rev=None):
172 172 """
173 173 Add all changeset in the vcs repo starting at start_rev
174 174 to the index writer
175
176 :param writer: the whoosh index writer to add to
177 :param repo_name: name of the repository from whence the
178 changeset originates including the repository group
179 :param repo: the vcs repository instance to index changesets for,
180 the presumption is the repo has changesets to index
181 :param start_rev=None: the full sha id to start indexing from
182 if start_rev is None then index from the first changeset in
183 the repo
175 184 """
176 185
177 log.debug('indexing changesets in %s[%d:]' % (repo_name, start_rev))
186 if start_rev is None:
187 start_rev = repo[0].raw_id
188
189 log.debug('indexing changesets in %s starting at rev: %s' % (repo_name, start_rev))
178 190
179 191 indexed=0
180 for cs in repo[start_rev:]:
192 for cs in repo.get_changesets(start=start_rev):
181 193 writer.add_document(
182 194 raw_id=unicode(cs.raw_id),
183 195 owner=unicode(repo.contact),
184 196 repository=safe_unicode(repo_name),
185 197 author=cs.author,
186 198 message=cs.message,
187 revision=cs.revision,
188 199 last=cs.last,
189 200 added=u' '.join([node.path for node in cs.added]).lower(),
190 201 removed=u' '.join([node.path for node in cs.removed]).lower(),
191 202 changed=u' '.join([node.path for node in cs.changed]).lower(),
192 203 parents=u' '.join([cs.raw_id for cs in cs.parents]),
193 204 )
194 205 indexed += 1
195 206
196 207 log.debug('indexed %d changesets for repo %s' % (indexed, repo_name))
197 208
198 209 def index_files(self, file_idx_writer, repo_name, repo):
199 210 i_cnt = iwc_cnt = 0
200 211 log.debug('building index for [%s]' % repo.path)
201 212 for idx_path in self.get_paths(repo):
202 213 i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name)
203 214 i_cnt += i
204 215 iwc_cnt += iwc
205 216
206 217 log.debug('added %s files %s with content for repo %s' % (i_cnt + iwc_cnt, iwc_cnt, repo.path))
207 218
208 219 def update_changeset_index(self):
209 220 idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
210 221
211 222 with idx.searcher() as searcher:
212 223 writer = idx.writer()
213 224 writer_is_dirty = False
214 225 try:
215 226 for repo_name, repo in self.repo_paths.items():
216 227 # skip indexing if there aren't any revs in the repo
217 revs = repo.revisions
218 if len(revs) < 1:
228 num_of_revs = len(repo)
229 if num_of_revs < 1:
219 230 continue
220 231
221 232 qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
222 233 q = qp.parse(u"last:t AND %s" % repo_name)
223 234
224 results = searcher.search(q, sortedby='revision')
235 results = searcher.search(q)
225 236
237 # default to scanning the entire repo
226 238 last_rev = 0
239 start_id = None
240
227 241 if len(results) > 0:
228 last_rev = results[0]['revision']
242 # assuming that there is only one result, if not this
243 # may require a full re-index.
244 start_id = results[0]['raw_id']
245 last_rev = repo.get_changeset(revision=start_id).revision
229 246
230 247 # there are new changesets to index or a new repo to index
231 if last_rev == 0 or len(revs) > last_rev + 1:
248 if last_rev == 0 or num_of_revs > last_rev + 1:
232 249 # delete the docs in the index for the previous last changeset(s)
233 250 for hit in results:
234 251 q = qp.parse(u"last:t AND %s AND raw_id:%s" %
235 252 (repo_name, hit['raw_id']))
236 253 writer.delete_by_query(q)
237 254
238 255 # index from the previous last changeset + all new ones
239 self.index_changesets(writer, repo_name, repo, last_rev)
256 self.index_changesets(writer, repo_name, repo, start_id)
240 257 writer_is_dirty = True
241 258
242 259 finally:
243 260 if writer_is_dirty:
244 261 log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
245 262 writer.commit(merge=True)
246 263 log.debug('>> COMMITTED CHANGES TO CHANGESET INDEX<<')
247 264 else:
248 265 writer.cancel
249 266
250 267 def update_file_index(self):
251 268 log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
252 269 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys()))
253 270
254 271 idx = open_dir(self.index_location, indexname=self.indexname)
255 272 # The set of all paths in the index
256 273 indexed_paths = set()
257 274 # The set of all paths we need to re-index
258 275 to_index = set()
259 276
260 277 writer = idx.writer()
261 278 writer_is_dirty = False
262 279 try:
263 280 with idx.reader() as reader:
264 281
265 282 # Loop over the stored fields in the index
266 283 for fields in reader.all_stored_fields():
267 284 indexed_path = fields['path']
268 285 indexed_repo_path = fields['repository']
269 286 indexed_paths.add(indexed_path)
270 287
271 288 if not indexed_repo_path in self.filtered_repo_update_paths:
272 289 continue
273 290
274 291 repo = self.repo_paths[indexed_repo_path]
275 292
276 293 try:
277 294 node = self.get_node(repo, indexed_path)
278 295 # Check if this file was changed since it was indexed
279 296 indexed_time = fields['modtime']
280 297 mtime = self.get_node_mtime(node)
281 298 if mtime > indexed_time:
282 299 # The file has changed, delete it and add it to the list of
283 300 # files to reindex
284 301 log.debug('adding to reindex list %s mtime: %s vs %s' % (
285 302 indexed_path, mtime, indexed_time)
286 303 )
287 304 writer.delete_by_term('fileid', indexed_path)
288 305 writer_is_dirty = True
289 306
290 307 to_index.add(indexed_path)
291 308 except (ChangesetError, NodeDoesNotExistError):
292 309 # This file was deleted since it was indexed
293 310 log.debug('removing from index %s' % indexed_path)
294 311 writer.delete_by_term('path', indexed_path)
295 312 writer_is_dirty = True
296 313
297 314 # Loop over the files in the filesystem
298 315 # Assume we have a function that gathers the filenames of the
299 316 # documents to be indexed
300 317 ri_cnt_total = 0 # indexed
301 318 riwc_cnt_total = 0 # indexed with content
302 319 for repo_name, repo in self.repo_paths.items():
303 320 # skip indexing if there aren't any revisions
304 321 if len(repo) < 1:
305 322 continue
306 323 ri_cnt = 0 # indexed
307 324 riwc_cnt = 0 # indexed with content
308 325 for path in self.get_paths(repo):
309 326 path = safe_unicode(path)
310 327 if path in to_index or path not in indexed_paths:
311 328
312 329 # This is either a file that's changed, or a new file
313 330 # that wasn't indexed before. So index it!
314 331 i, iwc = self.add_doc(writer, path, repo, repo_name)
315 332 writer_is_dirty = True
316 333 log.debug('re indexing %s' % path)
317 334 ri_cnt += i
318 335 ri_cnt_total += 1
319 336 riwc_cnt += iwc
320 337 riwc_cnt_total += iwc
321 338 log.debug('added %s files %s with content for repo %s' % (
322 339 ri_cnt + riwc_cnt, riwc_cnt, repo.path)
323 340 )
324 341 log.debug('indexed %s files in total and %s with content' % (
325 342 ri_cnt_total, riwc_cnt_total)
326 343 )
327 344 finally:
328 345 if writer_is_dirty:
329 346 log.debug('>> COMMITING CHANGES <<')
330 347 writer.commit(merge=True)
331 348 log.debug('>>> FINISHED REBUILDING INDEX <<<')
332 349 else:
333 350 writer.cancel()
334 351
335 352 def build_indexes(self):
336 353 if os.path.exists(self.index_location):
337 354 log.debug('removing previous index')
338 355 rmtree(self.index_location)
339 356
340 357 if not os.path.exists(self.index_location):
341 358 os.mkdir(self.index_location)
342 359
343 360 chgset_idx = create_in(self.index_location, CHGSETS_SCHEMA, indexname=CHGSET_IDX_NAME)
344 361 chgset_idx_writer = chgset_idx.writer()
345 362
346 363 file_idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
347 364 file_idx_writer = file_idx.writer()
348 365 log.debug('BUILDING INDEX FOR EXTENSIONS %s '
349 366 'AND REPOS %s' % (INDEX_EXTENSIONS, self.repo_paths.keys()))
350 367
351 368 for repo_name, repo in self.repo_paths.items():
352 369 # skip indexing if there aren't any revisions
353 370 if len(repo) < 1:
354 371 continue
355 372
356 373 self.index_files(file_idx_writer, repo_name, repo)
357 374 self.index_changesets(chgset_idx_writer, repo_name, repo)
358 375
359 376 log.debug('>> COMMITING CHANGES <<')
360 377 file_idx_writer.commit(merge=True)
361 378 chgset_idx_writer.commit(merge=True)
362 379 log.debug('>>> FINISHED BUILDING INDEX <<<')
363 380
364 381 def update_indexes(self):
365 382 self.update_file_index()
366 383 self.update_changeset_index()
367 384
368 385 def run(self, full_index=False):
369 386 """Run daemon"""
370 387 if full_index or self.initial:
371 388 self.build_indexes()
372 389 else:
373 390 self.update_indexes()
@@ -1,78 +1,78
1 1 import os
2 2 from rhodecode.tests import *
3 3 from nose.plugins.skip import SkipTest
4 4
5 5
6 6 class TestSearchController(TestController):
7 7
8 8 def test_index(self):
9 9 self.log_user()
10 10 response = self.app.get(url(controller='search', action='index'))
11 11
12 12 self.assertTrue('class="small" id="q" name="q" type="text"' in
13 13 response.body)
14 14 # Test response...
15 15
16 16 def test_empty_search(self):
17 17 if os.path.isdir(self.index_location):
18 18 raise SkipTest('skipped due to existing index')
19 19 else:
20 20 self.log_user()
21 21 response = self.app.get(url(controller='search', action='index'),
22 22 {'q': HG_REPO})
23 23 self.assertTrue('There is no index to search in. '
24 24 'Please run whoosh indexer' in response.body)
25 25
26 26 def test_normal_search(self):
27 27 self.log_user()
28 28 response = self.app.get(url(controller='search', action='index'),
29 29 {'q': 'def repo'})
30 30 response.mustcontain('10 results')
31 31
32 32 def test_repo_search(self):
33 33 self.log_user()
34 34 response = self.app.get(url(controller='search', action='index'),
35 35 {'q': 'repository:%s def test' % HG_REPO})
36 36
37 37 response.mustcontain('4 results')
38 38
39 39 def test_search_last(self):
40 40 self.log_user()
41 41 response = self.app.get(url(controller='search', action='index'),
42 42 {'q': 'last:t', 'type': 'commit'})
43 43
44 44 response.mustcontain('1 results')
45 45
46 46 def test_search_commit_message(self):
47 47 self.log_user()
48 48 response = self.app.get(url(controller='search', action='index'),
49 49 {'q': 'bother to ask where to fetch repo during tests',
50 50 'type': 'commit'})
51 51
52 52 response.mustcontain('1 results')
53 53 response.mustcontain('a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')
54 54
55 55 def test_search_commit_changed_file(self):
56 56 self.log_user()
57 57 response = self.app.get(url(controller='search', action='index'),
58 58 {'q': 'changed:tests/utils.py',
59 59 'type': 'commit'})
60 60
61 61 response.mustcontain('a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')
62 62
63 63 def test_search_commit_added_file(self):
64 64 self.log_user()
65 65 response = self.app.get(url(controller='search', action='index'),
66 66 {'q': 'added:README.rst',
67 67 'type': 'commit'})
68 68
69 69 response.mustcontain('1 results')
70 70 response.mustcontain('3803844fdbd3b711175fc3da9bdacfcd6d29a6fb')
71 71
72 72 def test_search_author(self):
73 73 self.log_user()
74 74 response = self.app.get(url(controller='search', action='index'),
75 {'q': 'author:marcin@python-blog.com revision:0',
75 {'q': 'author:marcin@python-blog.com raw_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545',
76 76 'type': 'commit'})
77 77
78 78 response.mustcontain('1 results')
General Comments 0
You need to be logged in to leave comments. Login now