##// END OF EJS Templates
Fixed whoosh daemon, for depracated walk method
marcink -
r947:99850ac8 beta
parent child Browse files
Show More
@@ -1,525 +1,526 b''
1 1 """Helper functions
2 2
3 3 Consists of functions to typically be used within templates, but also
4 4 available to Controllers. This module is available to both as 'h'.
5 5 """
6 6 import random
7 7 import hashlib
8 8 from pygments.formatters import HtmlFormatter
9 9 from pygments import highlight as code_highlight
10 10 from pylons import url, app_globals as g
11 11 from pylons.i18n.translation import _, ungettext
12 12 from vcs.utils.annotate import annotate_highlight
13 13 from webhelpers.html import literal, HTML, escape
14 14 from webhelpers.html.tools import *
15 15 from webhelpers.html.builder import make_tag
16 16 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
17 17 end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
18 18 link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
19 19 password, textarea, title, ul, xml_declaration, radio
20 20 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
21 21 mail_to, strip_links, strip_tags, tag_re
22 22 from webhelpers.number import format_byte_size, format_bit_size
23 23 from webhelpers.pylonslib import Flash as _Flash
24 24 from webhelpers.pylonslib.secure_form import secure_form
25 25 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
26 26 convert_misc_entities, lchop, plural, rchop, remove_formatting, \
27 27 replace_whitespace, urlify, truncate, wrap_paragraphs
28 28 from webhelpers.date import time_ago_in_words
29 29
30 30 from webhelpers.html.tags import _set_input_attrs, _set_id_attr, \
31 31 convert_boolean_attrs, NotGiven
32 32
33 33 def _reset(name, value=None, id=NotGiven, type="reset", **attrs):
34 34 """Reset button
35 35 """
36 36 _set_input_attrs(attrs, type, name, value)
37 37 _set_id_attr(attrs, id, name)
38 38 convert_boolean_attrs(attrs, ["disabled"])
39 39 return HTML.input(**attrs)
40 40
41 41 reset = _reset
42 42
43 43
44 44 def get_token():
45 45 """Return the current authentication token, creating one if one doesn't
46 46 already exist.
47 47 """
48 48 token_key = "_authentication_token"
49 49 from pylons import session
50 50 if not token_key in session:
51 51 try:
52 52 token = hashlib.sha1(str(random.getrandbits(128))).hexdigest()
53 53 except AttributeError: # Python < 2.4
54 54 token = hashlib.sha1(str(random.randrange(2 ** 128))).hexdigest()
55 55 session[token_key] = token
56 56 if hasattr(session, 'save'):
57 57 session.save()
58 58 return session[token_key]
59 59
60 60 class _GetError(object):
61 61 """Get error from form_errors, and represent it as span wrapped error
62 62 message
63 63
64 64 :param field_name: field to fetch errors for
65 65 :param form_errors: form errors dict
66 66 """
67 67
68 68 def __call__(self, field_name, form_errors):
69 69 tmpl = """<span class="error_msg">%s</span>"""
70 70 if form_errors and form_errors.has_key(field_name):
71 71 return literal(tmpl % form_errors.get(field_name))
72 72
73 73 get_error = _GetError()
74 74
75 75 def recursive_replace(str, replace=' '):
76 76 """Recursive replace of given sign to just one instance
77 77
78 78 :param str: given string
79 79 :param replace: char to find and replace multiple instances
80 80
81 81 Examples::
82 82 >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
83 83 'Mighty-Mighty-Bo-sstones'
84 84 """
85 85
86 86 if str.find(replace * 2) == -1:
87 87 return str
88 88 else:
89 89 str = str.replace(replace * 2, replace)
90 90 return recursive_replace(str, replace)
91 91
92 92 class _ToolTip(object):
93 93
94 94 def __call__(self, tooltip_title, trim_at=50):
95 95 """Special function just to wrap our text into nice formatted
96 96 autowrapped text
97 97
98 98 :param tooltip_title:
99 99 """
100 100
101 101 return wrap_paragraphs(escape(tooltip_title), trim_at)\
102 102 .replace('\n', '<br/>')
103 103
104 104 def activate(self):
105 105 """Adds tooltip mechanism to the given Html all tooltips have to have
106 106 set class `tooltip` and set attribute `tooltip_title`.
107 107 Then a tooltip will be generated based on that. All with yui js tooltip
108 108 """
109 109
110 110 js = '''
111 111 YAHOO.util.Event.onDOMReady(function(){
112 112 function toolTipsId(){
113 113 var ids = [];
114 114 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
115 115
116 116 for (var i = 0; i < tts.length; i++) {
117 117 //if element doesn't not have and id autogenerate one for tooltip
118 118
119 119 if (!tts[i].id){
120 120 tts[i].id='tt'+i*100;
121 121 }
122 122 ids.push(tts[i].id);
123 123 }
124 124 return ids
125 125 };
126 126 var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
127 127 context: toolTipsId(),
128 128 monitorresize:false,
129 129 xyoffset :[0,0],
130 130 autodismissdelay:300000,
131 131 hidedelay:5,
132 132 showdelay:20,
133 133 });
134 134
135 135 // Set the text for the tooltip just before we display it. Lazy method
136 136 myToolTips.contextTriggerEvent.subscribe(
137 137 function(type, args) {
138 138
139 139 var context = args[0];
140 140
141 141 //positioning of tooltip
142 142 var tt_w = this.element.clientWidth;//tooltip width
143 143 var tt_h = this.element.clientHeight;//tooltip height
144 144
145 145 var context_w = context.offsetWidth;
146 146 var context_h = context.offsetHeight;
147 147
148 148 var pos_x = YAHOO.util.Dom.getX(context);
149 149 var pos_y = YAHOO.util.Dom.getY(context);
150 150
151 151 var display_strategy = 'right';
152 152 var xy_pos = [0,0];
153 153 switch (display_strategy){
154 154
155 155 case 'top':
156 156 var cur_x = (pos_x+context_w/2)-(tt_w/2);
157 157 var cur_y = (pos_y-tt_h-4);
158 158 xy_pos = [cur_x,cur_y];
159 159 break;
160 160 case 'bottom':
161 161 var cur_x = (pos_x+context_w/2)-(tt_w/2);
162 162 var cur_y = pos_y+context_h+4;
163 163 xy_pos = [cur_x,cur_y];
164 164 break;
165 165 case 'left':
166 166 var cur_x = (pos_x-tt_w-4);
167 167 var cur_y = pos_y-((tt_h/2)-context_h/2);
168 168 xy_pos = [cur_x,cur_y];
169 169 break;
170 170 case 'right':
171 171 var cur_x = (pos_x+context_w+4);
172 172 var cur_y = pos_y-((tt_h/2)-context_h/2);
173 173 xy_pos = [cur_x,cur_y];
174 174 break;
175 175 default:
176 176 var cur_x = (pos_x+context_w/2)-(tt_w/2);
177 177 var cur_y = pos_y-tt_h-4;
178 178 xy_pos = [cur_x,cur_y];
179 179 break;
180 180
181 181 }
182 182
183 183 this.cfg.setProperty("xy",xy_pos);
184 184
185 185 });
186 186
187 187 //Mouse out
188 188 myToolTips.contextMouseOutEvent.subscribe(
189 189 function(type, args) {
190 190 var context = args[0];
191 191
192 192 });
193 193 });
194 194 '''
195 195 return literal(js)
196 196
197 197 tooltip = _ToolTip()
198 198
199 199 class _FilesBreadCrumbs(object):
200 200
201 201 def __call__(self, repo_name, rev, paths):
202 202 url_l = [link_to(repo_name, url('files_home',
203 203 repo_name=repo_name,
204 204 revision=rev, f_path=''))]
205 205 paths_l = paths.split('/')
206 206
207 207 for cnt, p in enumerate(paths_l):
208 208 if p != '':
209 209 url_l.append(link_to(p, url('files_home',
210 210 repo_name=repo_name,
211 211 revision=rev,
212 212 f_path='/'.join(paths_l[:cnt + 1]))))
213 213
214 214 return literal('/'.join(url_l))
215 215
216 216 files_breadcrumbs = _FilesBreadCrumbs()
217 217
218 218 class CodeHtmlFormatter(HtmlFormatter):
219 219
220 220 def wrap(self, source, outfile):
221 221 return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
222 222
223 223 def _wrap_code(self, source):
224 224 for cnt, it in enumerate(source):
225 225 i, t = it
226 226 t = '<div id="#S-%s">%s</div>' % (cnt + 1, t)
227 227 yield i, t
228 228 def pygmentize(filenode, **kwargs):
229 229 """pygmentize function using pygments
230 230
231 231 :param filenode:
232 232 """
233 233
234 234 return literal(code_highlight(filenode.content,
235 235 filenode.lexer, CodeHtmlFormatter(**kwargs)))
236 236
237 237 def pygmentize_annotation(filenode, **kwargs):
238 238 """pygmentize function for annotation
239 239
240 240 :param filenode:
241 241 """
242 242
243 243 color_dict = {}
244 def gen_color():
245 """generator for getting 10k of evenly distibuted colors using hsv color
246 and golden ratio.
244 def gen_color(n=10000):
245 """generator for getting n of evenly distributed colors using
246 hsv color and golden ratio. It always return same order of colors
247
248 :returns: RGB tuple
247 249 """
248 250 import colorsys
249 n = 10000
250 251 golden_ratio = 0.618033988749895
251 252 h = 0.22717784590367374
252 #generate 10k nice web friendly colors in the same order
253
253 254 for c in xrange(n):
254 255 h += golden_ratio
255 256 h %= 1
256 257 HSV_tuple = [h, 0.95, 0.95]
257 258 RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
258 259 yield map(lambda x:str(int(x * 256)), RGB_tuple)
259 260
260 261 cgenerator = gen_color()
261 262
262 263 def get_color_string(cs):
263 264 if color_dict.has_key(cs):
264 265 col = color_dict[cs]
265 266 else:
266 267 col = color_dict[cs] = cgenerator.next()
267 268 return "color: rgb(%s)! important;" % (', '.join(col))
268 269
269 270 def url_func(changeset):
270 271 tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>" + \
271 272 " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
272 273
273 274 tooltip_html = tooltip_html % (changeset.author,
274 275 changeset.date,
275 276 tooltip(changeset.message))
276 277 lnk_format = '%5s:%s' % ('r%s' % changeset.revision,
277 278 short_id(changeset.raw_id))
278 279 uri = link_to(
279 280 lnk_format,
280 281 url('changeset_home', repo_name=changeset.repository.name,
281 282 revision=changeset.raw_id),
282 283 style=get_color_string(changeset.raw_id),
283 284 class_='tooltip',
284 285 title=tooltip_html
285 286 )
286 287
287 288 uri += '\n'
288 289 return uri
289 290 return literal(annotate_highlight(filenode, url_func, **kwargs))
290 291
291 292 def repo_name_slug(value):
292 293 """Return slug of name of repository
293 294 This function is called on each creation/modification
294 295 of repository to prevent bad names in repo
295 296 """
296 297
297 298 slug = remove_formatting(value)
298 299 slug = strip_tags(slug)
299 300
300 301 for c in """=[]\;'"<>,/~!@#$%^&*()+{}|: """:
301 302 slug = slug.replace(c, '-')
302 303 slug = recursive_replace(slug, '-')
303 304 slug = collapse(slug, '-')
304 305 return slug
305 306
306 307 def get_changeset_safe(repo, rev):
307 308 from vcs.backends.base import BaseRepository
308 309 from vcs.exceptions import RepositoryError
309 310 if not isinstance(repo, BaseRepository):
310 311 raise Exception('You must pass an Repository '
311 312 'object as first argument got %s', type(repo))
312 313
313 314 try:
314 315 cs = repo.get_changeset(rev)
315 316 except RepositoryError:
316 317 from rhodecode.lib.utils import EmptyChangeset
317 318 cs = EmptyChangeset()
318 319 return cs
319 320
320 321
321 322 flash = _Flash()
322 323
323 324
324 325 #==============================================================================
325 326 # MERCURIAL FILTERS available via h.
326 327 #==============================================================================
327 328 from mercurial import util
328 329 from mercurial.templatefilters import person as _person
329 330
330 331 def _age(curdate):
331 332 """turns a datetime into an age string."""
332 333
333 334 if not curdate:
334 335 return ''
335 336
336 337 from datetime import timedelta, datetime
337 338
338 339 agescales = [("year", 3600 * 24 * 365),
339 340 ("month", 3600 * 24 * 30),
340 341 ("day", 3600 * 24),
341 342 ("hour", 3600),
342 343 ("minute", 60),
343 344 ("second", 1), ]
344 345
345 346 age = datetime.now() - curdate
346 347 age_seconds = (age.days * agescales[2][1]) + age.seconds
347 348 pos = 1
348 349 for scale in agescales:
349 350 if scale[1] <= age_seconds:
350 351 if pos == 6:pos = 5
351 352 return time_ago_in_words(curdate, agescales[pos][0]) + ' ' + _('ago')
352 353 pos += 1
353 354
354 355 return _('just now')
355 356
356 357 age = lambda x:_age(x)
357 358 capitalize = lambda x: x.capitalize()
358 359 email = util.email
359 360 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
360 361 person = lambda x: _person(x)
361 362 short_id = lambda x: x[:12]
362 363
363 364
364 365 def bool2icon(value):
365 366 """Returns True/False values represented as small html image of true/false
366 367 icons
367 368
368 369 :param value: bool value
369 370 """
370 371
371 372 if value is True:
372 373 return HTML.tag('img', src="/images/icons/accept.png", alt=_('True'))
373 374
374 375 if value is False:
375 376 return HTML.tag('img', src="/images/icons/cancel.png", alt=_('False'))
376 377
377 378 return value
378 379
379 380
380 381 def action_parser(user_log):
381 382 """This helper will map the specified string action into translated
382 383 fancy names with icons and links
383 384
384 385 :param user_log: user log instance
385 386 """
386 387
387 388 action = user_log.action
388 389 action_params = ' '
389 390
390 391 x = action.split(':')
391 392
392 393 if len(x) > 1:
393 394 action, action_params = x
394 395
395 396 def get_cs_links():
396 397 if action == 'push':
397 398 revs_limit = 5 #display this amount always
398 399 revs_top_limit = 50 #show upto this amount of changesets hidden
399 400 revs = action_params.split(',')
400 401 repo_name = user_log.repository.repo_name
401 402 from rhodecode.model.scm import ScmModel
402 403
403 404 message = lambda rev: get_changeset_safe(ScmModel().get(repo_name),
404 405 rev).message
405 406
406 407 cs_links = " " + ', '.join ([link_to(rev,
407 408 url('changeset_home',
408 409 repo_name=repo_name,
409 410 revision=rev), title=tooltip(message(rev)),
410 411 class_='tooltip') for rev in revs[:revs_limit] ])
411 412 if len(revs) > revs_limit:
412 413 uniq_id = revs[0]
413 414 html_tmpl = ('<span> %s '
414 415 '<a class="show_more" id="_%s" href="#">%s</a> '
415 416 '%s</span>')
416 417 cs_links += html_tmpl % (_('and'), uniq_id, _('%s more') \
417 418 % (len(revs) - revs_limit),
418 419 _('revisions'))
419 420
420 421 html_tmpl = '<span id="%s" style="display:none"> %s </span>'
421 422 cs_links += html_tmpl % (uniq_id, ', '.join([link_to(rev,
422 423 url('changeset_home',
423 424 repo_name=repo_name, revision=rev),
424 425 title=message(rev), class_='tooltip')
425 426 for rev in revs[revs_limit:revs_top_limit]]))
426 427
427 428 return cs_links
428 429 return ''
429 430
430 431 def get_fork_name():
431 432 if action == 'user_forked_repo':
432 433 from rhodecode.model.scm import ScmModel
433 434 repo_name = action_params
434 435 repo = ScmModel().get(repo_name)
435 436 if repo is None:
436 437 return repo_name
437 438 return link_to(action_params, url('summary_home',
438 439 repo_name=repo.name,),
439 440 title=repo.dbrepo.description)
440 441 return ''
441 442 map = {'user_deleted_repo':_('User [deleted] repository'),
442 443 'user_created_repo':_('User [created] repository'),
443 444 'user_forked_repo':_('User [forked] repository as: %s') % get_fork_name(),
444 445 'user_updated_repo':_('User [updated] repository'),
445 446 'admin_deleted_repo':_('Admin [delete] repository'),
446 447 'admin_created_repo':_('Admin [created] repository'),
447 448 'admin_forked_repo':_('Admin [forked] repository'),
448 449 'admin_updated_repo':_('Admin [updated] repository'),
449 450 'push':_('[Pushed] %s') % get_cs_links(),
450 451 'pull':_('[Pulled]'),
451 452 'started_following_repo':_('User [started following] repository'),
452 453 'stopped_following_repo':_('User [stopped following] repository'),
453 454 }
454 455
455 456 action_str = map.get(action, action)
456 457 return literal(action_str.replace('[', '<span class="journal_highlight">')\
457 458 .replace(']', '</span>'))
458 459
459 460 def action_parser_icon(user_log):
460 461 action = user_log.action
461 462 action_params = None
462 463 x = action.split(':')
463 464
464 465 if len(x) > 1:
465 466 action, action_params = x
466 467
467 468 tmpl = """<img src="/images/icons/%s" alt="%s"/>"""
468 469 map = {'user_deleted_repo':'database_delete.png',
469 470 'user_created_repo':'database_add.png',
470 471 'user_forked_repo':'arrow_divide.png',
471 472 'user_updated_repo':'database_edit.png',
472 473 'admin_deleted_repo':'database_delete.png',
473 474 'admin_created_repo':'database_add.png',
474 475 'admin_forked_repo':'arrow_divide.png',
475 476 'admin_updated_repo':'database_edit.png',
476 477 'push':'script_add.png',
477 478 'pull':'down_16.png',
478 479 'started_following_repo':'heart_add.png',
479 480 'stopped_following_repo':'heart_delete.png',
480 481 }
481 482 return literal(tmpl % (map.get(action, action), action))
482 483
483 484
484 485 #==============================================================================
485 486 # PERMS
486 487 #==============================================================================
487 488 from rhodecode.lib.auth import HasPermissionAny, HasPermissionAll, \
488 489 HasRepoPermissionAny, HasRepoPermissionAll
489 490
490 491 #==============================================================================
491 492 # GRAVATAR URL
492 493 #==============================================================================
493 494 import hashlib
494 495 import urllib
495 496 from pylons import request
496 497
497 498 def gravatar_url(email_address, size=30):
498 499 ssl_enabled = 'https' == request.environ.get('wsgi.url_scheme')
499 500 default = 'identicon'
500 501 baseurl_nossl = "http://www.gravatar.com/avatar/"
501 502 baseurl_ssl = "https://secure.gravatar.com/avatar/"
502 503 baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
503 504
504 505
505 506 # construct the url
506 507 gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
507 508 gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
508 509
509 510 return gravatar_url
510 511
511 512 def safe_unicode(str):
512 513 """safe unicode function. In case of UnicodeDecode error we try to return
513 514 unicode with errors replace, if this failes we return unicode with
514 515 string_escape decoding """
515 516
516 517 try:
517 518 u_str = unicode(str)
518 519 except UnicodeDecodeError:
519 520 try:
520 521 u_str = unicode(str, 'utf-8', 'replace')
521 522 except UnicodeDecodeError:
522 523 #incase we have a decode error just represent as byte string
523 524 u_str = unicode(str(str).encode('string_escape'))
524 525
525 526 return u_str
@@ -1,236 +1,237 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.daemon
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 A deamon will read from task table and run tasks
7 7
8 8 :created_on: Jan 26, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software; you can redistribute it and/or
14 14 # modify it under the terms of the GNU General Public License
15 15 # as published by the Free Software Foundation; version 2
16 16 # of the License or (at your opinion) any later version of the license.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program; if not, write to the Free Software
25 25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 26 # MA 02110-1301, USA.
27 27
28 28 import sys
29 29 import os
30 30 import traceback
31 31 from os.path import dirname as dn
32 32 from os.path import join as jn
33 33
34 34 #to get the rhodecode import
35 35 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
36 36 sys.path.append(project_path)
37 37
38 38
39 39 from rhodecode.model.scm import ScmModel
40 40 from rhodecode.lib.helpers import safe_unicode
41 41 from whoosh.index import create_in, open_dir
42 42 from shutil import rmtree
43 43 from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
44 44
45 45 from time import mktime
46 46 from vcs.exceptions import ChangesetError, RepositoryError
47 47
48 48 import logging
49 49
50 50 log = logging.getLogger('whooshIndexer')
51 51 # create logger
52 52 log.setLevel(logging.DEBUG)
53 53 log.propagate = False
54 54 # create console handler and set level to debug
55 55 ch = logging.StreamHandler()
56 56 ch.setLevel(logging.DEBUG)
57 57
58 58 # create formatter
59 59 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
60 60
61 61 # add formatter to ch
62 62 ch.setFormatter(formatter)
63 63
64 64 # add ch to logger
65 65 log.addHandler(ch)
66 66
67 67 class WhooshIndexingDaemon(object):
68 68 """
69 69 Deamon for atomic jobs
70 70 """
71 71
72 72 def __init__(self, indexname='HG_INDEX', index_location=None,
73 73 repo_location=None, sa=None, repo_list=None):
74 74 self.indexname = indexname
75 75
76 76 self.index_location = index_location
77 77 if not index_location:
78 78 raise Exception('You have to provide index location')
79 79
80 80 self.repo_location = repo_location
81 81 if not repo_location:
82 82 raise Exception('You have to provide repositories location')
83 83
84 84 self.repo_paths = ScmModel(sa).repo_scan(self.repo_location, None)
85 85
86 86 if repo_list:
87 87 filtered_repo_paths = {}
88 88 for repo_name, repo in self.repo_paths.items():
89 89 if repo_name in repo_list:
90 90 filtered_repo_paths[repo.name] = repo
91 91
92 92 self.repo_paths = filtered_repo_paths
93 93
94 94
95 95 self.initial = False
96 96 if not os.path.isdir(self.index_location):
97 97 os.makedirs(self.index_location)
98 98 log.info('Cannot run incremental index since it does not'
99 99 ' yet exist running full build')
100 100 self.initial = True
101 101
102 102 def get_paths(self, repo):
103 103 """recursive walk in root dir and return a set of all path in that dir
104 104 based on repository walk function
105 105 """
106 106 index_paths_ = set()
107 107 try:
108 for topnode, dirs, files in repo.walk('/', 'tip'):
108 tip = repo.get_changeset('tip')
109 for topnode, dirs, files in tip.walk('/'):
109 110 for f in files:
110 111 index_paths_.add(jn(repo.path, f.path))
111 112 for dir in dirs:
112 113 for f in files:
113 114 index_paths_.add(jn(repo.path, f.path))
114 115
115 116 except RepositoryError, e:
116 117 log.debug(traceback.format_exc())
117 118 pass
118 119 return index_paths_
119 120
120 121 def get_node(self, repo, path):
121 122 n_path = path[len(repo.path) + 1:]
122 123 node = repo.get_changeset().get_node(n_path)
123 124 return node
124 125
125 126 def get_node_mtime(self, node):
126 127 return mktime(node.last_changeset.date.timetuple())
127 128
128 129 def add_doc(self, writer, path, repo):
129 130 """Adding doc to writer this function itself fetches data from
130 131 the instance of vcs backend"""
131 132 node = self.get_node(repo, path)
132 133
133 134 #we just index the content of chosen files, and skip binary files
134 135 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
135 136
136 137 u_content = node.content
137 138 if not isinstance(u_content, unicode):
138 139 log.warning(' >> %s Could not get this content as unicode '
139 140 'replacing with empty content', path)
140 141 u_content = u''
141 142 else:
142 143 log.debug(' >> %s [WITH CONTENT]' % path)
143 144
144 145 else:
145 146 log.debug(' >> %s' % path)
146 147 #just index file name without it's content
147 148 u_content = u''
148 149
149 150 writer.add_document(owner=unicode(repo.contact),
150 151 repository=safe_unicode(repo.name),
151 152 path=safe_unicode(path),
152 153 content=u_content,
153 154 modtime=self.get_node_mtime(node),
154 155 extension=node.extension)
155 156
156 157
157 158 def build_index(self):
158 159 if os.path.exists(self.index_location):
159 160 log.debug('removing previous index')
160 161 rmtree(self.index_location)
161 162
162 163 if not os.path.exists(self.index_location):
163 164 os.mkdir(self.index_location)
164 165
165 166 idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
166 167 writer = idx.writer()
167 168
168 169 for repo in self.repo_paths.values():
169 170 log.debug('building index @ %s' % repo.path)
170 171
171 172 for idx_path in self.get_paths(repo):
172 173 self.add_doc(writer, idx_path, repo)
173 174
174 175 log.debug('>> COMMITING CHANGES <<')
175 176 writer.commit(merge=True)
176 177 log.debug('>>> FINISHED BUILDING INDEX <<<')
177 178
178 179
179 180 def update_index(self):
180 181 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
181 182
182 183 idx = open_dir(self.index_location, indexname=self.indexname)
183 184 # The set of all paths in the index
184 185 indexed_paths = set()
185 186 # The set of all paths we need to re-index
186 187 to_index = set()
187 188
188 189 reader = idx.reader()
189 190 writer = idx.writer()
190 191
191 192 # Loop over the stored fields in the index
192 193 for fields in reader.all_stored_fields():
193 194 indexed_path = fields['path']
194 195 indexed_paths.add(indexed_path)
195 196
196 197 repo = self.repo_paths[fields['repository']]
197 198
198 199 try:
199 200 node = self.get_node(repo, indexed_path)
200 201 except ChangesetError:
201 202 # This file was deleted since it was indexed
202 203 log.debug('removing from index %s' % indexed_path)
203 204 writer.delete_by_term('path', indexed_path)
204 205
205 206 else:
206 207 # Check if this file was changed since it was indexed
207 208 indexed_time = fields['modtime']
208 209 mtime = self.get_node_mtime(node)
209 210 if mtime > indexed_time:
210 211 # The file has changed, delete it and add it to the list of
211 212 # files to reindex
212 213 log.debug('adding to reindex list %s' % indexed_path)
213 214 writer.delete_by_term('path', indexed_path)
214 215 to_index.add(indexed_path)
215 216
216 217 # Loop over the files in the filesystem
217 218 # Assume we have a function that gathers the filenames of the
218 219 # documents to be indexed
219 220 for repo in self.repo_paths.values():
220 221 for path in self.get_paths(repo):
221 222 if path in to_index or path not in indexed_paths:
222 223 # This is either a file that's changed, or a new file
223 224 # that wasn't indexed before. So index it!
224 225 self.add_doc(writer, path, repo)
225 226 log.debug('re indexing %s' % path)
226 227
227 228 log.debug('>> COMMITING CHANGES <<')
228 229 writer.commit(merge=True)
229 230 log.debug('>>> FINISHED REBUILDING INDEX <<<')
230 231
231 232 def run(self, full_index=False):
232 233 """Run daemon"""
233 234 if full_index or self.initial:
234 235 self.build_index()
235 236 else:
236 237 self.update_index()
General Comments 0
You need to be logged in to leave comments. Login now