##// END OF EJS Templates
added safe unicode funtion, and implemented it in whoosh indexer
marcink -
r443:e5157e2a default
parent child Browse files
Show More
@@ -1,338 +1,354
1 """Helper functions
1 """Helper functions
2
2
3 Consists of functions to typically be used within templates, but also
3 Consists of functions to typically be used within templates, but also
4 available to Controllers. This module is available to both as 'h'.
4 available to Controllers. This module is available to both as 'h'.
5 """
5 """
6 from pygments.formatters import HtmlFormatter
6 from pygments.formatters import HtmlFormatter
7 from pygments import highlight as code_highlight
7 from pygments import highlight as code_highlight
8 from pylons import url, app_globals as g
8 from pylons import url, app_globals as g
9 from pylons.i18n.translation import _, ungettext
9 from pylons.i18n.translation import _, ungettext
10 from vcs.utils.annotate import annotate_highlight
10 from vcs.utils.annotate import annotate_highlight
11 from webhelpers.html import literal, HTML, escape
11 from webhelpers.html import literal, HTML, escape
12 from webhelpers.html.tools import *
12 from webhelpers.html.tools import *
13 from webhelpers.html.builder import make_tag
13 from webhelpers.html.builder import make_tag
14 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
14 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
15 end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
15 end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
16 link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
16 link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
17 password, textarea, title, ul, xml_declaration, radio
17 password, textarea, title, ul, xml_declaration, radio
18 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
18 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
19 mail_to, strip_links, strip_tags, tag_re
19 mail_to, strip_links, strip_tags, tag_re
20 from webhelpers.number import format_byte_size, format_bit_size
20 from webhelpers.number import format_byte_size, format_bit_size
21 from webhelpers.pylonslib import Flash as _Flash
21 from webhelpers.pylonslib import Flash as _Flash
22 from webhelpers.pylonslib.secure_form import secure_form
22 from webhelpers.pylonslib.secure_form import secure_form
23 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
23 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
24 convert_misc_entities, lchop, plural, rchop, remove_formatting, \
24 convert_misc_entities, lchop, plural, rchop, remove_formatting, \
25 replace_whitespace, urlify, truncate, wrap_paragraphs
25 replace_whitespace, urlify, truncate, wrap_paragraphs
26
26
27 #Custom helpers here :)
27 #Custom helpers here :)
28 class _Link(object):
28 class _Link(object):
29 '''
29 '''
30 Make a url based on label and url with help of url_for
30 Make a url based on label and url with help of url_for
31 @param label:name of link if not defined url is used
31 @param label:name of link if not defined url is used
32 @param url: the url for link
32 @param url: the url for link
33 '''
33 '''
34
34
35 def __call__(self, label='', *url_, **urlargs):
35 def __call__(self, label='', *url_, **urlargs):
36 if label is None or '':
36 if label is None or '':
37 label = url
37 label = url
38 link_fn = link_to(label, url(*url_, **urlargs))
38 link_fn = link_to(label, url(*url_, **urlargs))
39 return link_fn
39 return link_fn
40
40
41 link = _Link()
41 link = _Link()
42
42
43 class _GetError(object):
43 class _GetError(object):
44
44
45 def __call__(self, field_name, form_errors):
45 def __call__(self, field_name, form_errors):
46 tmpl = """<span class="error_msg">%s</span>"""
46 tmpl = """<span class="error_msg">%s</span>"""
47 if form_errors and form_errors.has_key(field_name):
47 if form_errors and form_errors.has_key(field_name):
48 return literal(tmpl % form_errors.get(field_name))
48 return literal(tmpl % form_errors.get(field_name))
49
49
50 get_error = _GetError()
50 get_error = _GetError()
51
51
52 def recursive_replace(str, replace=' '):
52 def recursive_replace(str, replace=' '):
53 """
53 """
54 Recursive replace of given sign to just one instance
54 Recursive replace of given sign to just one instance
55 @param str: given string
55 @param str: given string
56 @param replace:char to find and replace multiple instances
56 @param replace:char to find and replace multiple instances
57
57
58 Examples::
58 Examples::
59 >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
59 >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
60 'Mighty-Mighty-Bo-sstones'
60 'Mighty-Mighty-Bo-sstones'
61 """
61 """
62
62
63 if str.find(replace * 2) == -1:
63 if str.find(replace * 2) == -1:
64 return str
64 return str
65 else:
65 else:
66 str = str.replace(replace * 2, replace)
66 str = str.replace(replace * 2, replace)
67 return recursive_replace(str, replace)
67 return recursive_replace(str, replace)
68
68
69 class _ToolTip(object):
69 class _ToolTip(object):
70
70
71 def __call__(self, tooltip_title, trim_at=50):
71 def __call__(self, tooltip_title, trim_at=50):
72 """
72 """
73 Special function just to wrap our text into nice formatted autowrapped
73 Special function just to wrap our text into nice formatted autowrapped
74 text
74 text
75 @param tooltip_title:
75 @param tooltip_title:
76 """
76 """
77
77
78 return literal(wrap_paragraphs(tooltip_title, trim_at)\
78 return literal(wrap_paragraphs(tooltip_title, trim_at)\
79 .replace('\n', '<br/>'))
79 .replace('\n', '<br/>'))
80
80
81 def activate(self):
81 def activate(self):
82 """
82 """
83 Adds tooltip mechanism to the given Html all tooltips have to have
83 Adds tooltip mechanism to the given Html all tooltips have to have
84 set class tooltip and set attribute tooltip_title.
84 set class tooltip and set attribute tooltip_title.
85 Then a tooltip will be generated based on that
85 Then a tooltip will be generated based on that
86 All with yui js tooltip
86 All with yui js tooltip
87 """
87 """
88
88
89 js = '''
89 js = '''
90 YAHOO.util.Event.onDOMReady(function(){
90 YAHOO.util.Event.onDOMReady(function(){
91 function toolTipsId(){
91 function toolTipsId(){
92 var ids = [];
92 var ids = [];
93 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
93 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
94
94
95 for (var i = 0; i < tts.length; i++) {
95 for (var i = 0; i < tts.length; i++) {
96 //if element doesn not have and id autgenerate one for tooltip
96 //if element doesn not have and id autgenerate one for tooltip
97
97
98 if (!tts[i].id){
98 if (!tts[i].id){
99 tts[i].id='tt'+i*100;
99 tts[i].id='tt'+i*100;
100 }
100 }
101 ids.push(tts[i].id);
101 ids.push(tts[i].id);
102 }
102 }
103 return ids
103 return ids
104 };
104 };
105 var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
105 var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
106 context: toolTipsId(),
106 context: toolTipsId(),
107 monitorresize:false,
107 monitorresize:false,
108 xyoffset :[0,0],
108 xyoffset :[0,0],
109 autodismissdelay:300000,
109 autodismissdelay:300000,
110 hidedelay:5,
110 hidedelay:5,
111 showdelay:20,
111 showdelay:20,
112 });
112 });
113
113
114 //Mouse Over event disabled for new repositories since they dont
114 //Mouse Over event disabled for new repositories since they dont
115 //have last commit message
115 //have last commit message
116 myToolTips.contextMouseOverEvent.subscribe(
116 myToolTips.contextMouseOverEvent.subscribe(
117 function(type, args) {
117 function(type, args) {
118 var context = args[0];
118 var context = args[0];
119 var txt = context.getAttribute('tooltip_title');
119 var txt = context.getAttribute('tooltip_title');
120 if(txt){
120 if(txt){
121 return true;
121 return true;
122 }
122 }
123 else{
123 else{
124 return false;
124 return false;
125 }
125 }
126 });
126 });
127
127
128
128
129 // Set the text for the tooltip just before we display it. Lazy method
129 // Set the text for the tooltip just before we display it. Lazy method
130 myToolTips.contextTriggerEvent.subscribe(
130 myToolTips.contextTriggerEvent.subscribe(
131 function(type, args) {
131 function(type, args) {
132
132
133
133
134 var context = args[0];
134 var context = args[0];
135
135
136 var txt = context.getAttribute('tooltip_title');
136 var txt = context.getAttribute('tooltip_title');
137 this.cfg.setProperty("text", txt);
137 this.cfg.setProperty("text", txt);
138
138
139
139
140 // positioning of tooltip
140 // positioning of tooltip
141 var tt_w = this.element.clientWidth;
141 var tt_w = this.element.clientWidth;
142 var tt_h = this.element.clientHeight;
142 var tt_h = this.element.clientHeight;
143
143
144 var context_w = context.offsetWidth;
144 var context_w = context.offsetWidth;
145 var context_h = context.offsetHeight;
145 var context_h = context.offsetHeight;
146
146
147 var pos_x = YAHOO.util.Dom.getX(context);
147 var pos_x = YAHOO.util.Dom.getX(context);
148 var pos_y = YAHOO.util.Dom.getY(context);
148 var pos_y = YAHOO.util.Dom.getY(context);
149
149
150 var display_strategy = 'top';
150 var display_strategy = 'top';
151 var xy_pos = [0,0];
151 var xy_pos = [0,0];
152 switch (display_strategy){
152 switch (display_strategy){
153
153
154 case 'top':
154 case 'top':
155 var cur_x = (pos_x+context_w/2)-(tt_w/2);
155 var cur_x = (pos_x+context_w/2)-(tt_w/2);
156 var cur_y = pos_y-tt_h-4;
156 var cur_y = pos_y-tt_h-4;
157 xy_pos = [cur_x,cur_y];
157 xy_pos = [cur_x,cur_y];
158 break;
158 break;
159 case 'bottom':
159 case 'bottom':
160 var cur_x = (pos_x+context_w/2)-(tt_w/2);
160 var cur_x = (pos_x+context_w/2)-(tt_w/2);
161 var cur_y = pos_y+context_h+4;
161 var cur_y = pos_y+context_h+4;
162 xy_pos = [cur_x,cur_y];
162 xy_pos = [cur_x,cur_y];
163 break;
163 break;
164 case 'left':
164 case 'left':
165 var cur_x = (pos_x-tt_w-4);
165 var cur_x = (pos_x-tt_w-4);
166 var cur_y = pos_y-((tt_h/2)-context_h/2);
166 var cur_y = pos_y-((tt_h/2)-context_h/2);
167 xy_pos = [cur_x,cur_y];
167 xy_pos = [cur_x,cur_y];
168 break;
168 break;
169 case 'right':
169 case 'right':
170 var cur_x = (pos_x+context_w+4);
170 var cur_x = (pos_x+context_w+4);
171 var cur_y = pos_y-((tt_h/2)-context_h/2);
171 var cur_y = pos_y-((tt_h/2)-context_h/2);
172 xy_pos = [cur_x,cur_y];
172 xy_pos = [cur_x,cur_y];
173 break;
173 break;
174 default:
174 default:
175 var cur_x = (pos_x+context_w/2)-(tt_w/2);
175 var cur_x = (pos_x+context_w/2)-(tt_w/2);
176 var cur_y = pos_y-tt_h-4;
176 var cur_y = pos_y-tt_h-4;
177 xy_pos = [cur_x,cur_y];
177 xy_pos = [cur_x,cur_y];
178 break;
178 break;
179
179
180 }
180 }
181
181
182 this.cfg.setProperty("xy",xy_pos);
182 this.cfg.setProperty("xy",xy_pos);
183
183
184 });
184 });
185
185
186 //Mouse out
186 //Mouse out
187 myToolTips.contextMouseOutEvent.subscribe(
187 myToolTips.contextMouseOutEvent.subscribe(
188 function(type, args) {
188 function(type, args) {
189 var context = args[0];
189 var context = args[0];
190
190
191 });
191 });
192 });
192 });
193 '''
193 '''
194 return literal(js)
194 return literal(js)
195
195
196 tooltip = _ToolTip()
196 tooltip = _ToolTip()
197
197
198 class _FilesBreadCrumbs(object):
198 class _FilesBreadCrumbs(object):
199
199
200 def __call__(self, repo_name, rev, paths):
200 def __call__(self, repo_name, rev, paths):
201 url_l = [link_to(repo_name, url('files_home',
201 url_l = [link_to(repo_name, url('files_home',
202 repo_name=repo_name,
202 repo_name=repo_name,
203 revision=rev, f_path=''))]
203 revision=rev, f_path=''))]
204 paths_l = paths.split('/')
204 paths_l = paths.split('/')
205
205
206 for cnt, p in enumerate(paths_l, 1):
206 for cnt, p in enumerate(paths_l, 1):
207 if p != '':
207 if p != '':
208 url_l.append(link_to(p, url('files_home',
208 url_l.append(link_to(p, url('files_home',
209 repo_name=repo_name,
209 repo_name=repo_name,
210 revision=rev,
210 revision=rev,
211 f_path='/'.join(paths_l[:cnt]))))
211 f_path='/'.join(paths_l[:cnt]))))
212
212
213 return literal(' / '.join(url_l))
213 return literal(' / '.join(url_l))
214
214
215 files_breadcrumbs = _FilesBreadCrumbs()
215 files_breadcrumbs = _FilesBreadCrumbs()
216
216
217 def pygmentize(filenode, **kwargs):
217 def pygmentize(filenode, **kwargs):
218 """
218 """
219 pygmentize function using pygments
219 pygmentize function using pygments
220 @param filenode:
220 @param filenode:
221 """
221 """
222 return literal(code_highlight(filenode.content,
222 return literal(code_highlight(filenode.content,
223 filenode.lexer, HtmlFormatter(**kwargs)))
223 filenode.lexer, HtmlFormatter(**kwargs)))
224
224
225 def pygmentize_annotation(filenode, **kwargs):
225 def pygmentize_annotation(filenode, **kwargs):
226 """
226 """
227 pygmentize function for annotation
227 pygmentize function for annotation
228 @param filenode:
228 @param filenode:
229 """
229 """
230
230
231 color_dict = {}
231 color_dict = {}
232 def gen_color():
232 def gen_color():
233 """generator for getting 10k of evenly distibuted colors using hsv color
233 """generator for getting 10k of evenly distibuted colors using hsv color
234 and golden ratio.
234 and golden ratio.
235 """
235 """
236 import colorsys
236 import colorsys
237 n = 10000
237 n = 10000
238 golden_ratio = 0.618033988749895
238 golden_ratio = 0.618033988749895
239 h = 0.22717784590367374
239 h = 0.22717784590367374
240 #generate 10k nice web friendly colors in the same order
240 #generate 10k nice web friendly colors in the same order
241 for c in xrange(n):
241 for c in xrange(n):
242 h +=golden_ratio
242 h +=golden_ratio
243 h %= 1
243 h %= 1
244 HSV_tuple = [h, 0.95, 0.95]
244 HSV_tuple = [h, 0.95, 0.95]
245 RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
245 RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
246 yield map(lambda x:str(int(x*256)),RGB_tuple)
246 yield map(lambda x:str(int(x*256)),RGB_tuple)
247
247
248 cgenerator = gen_color()
248 cgenerator = gen_color()
249
249
250 def get_color_string(cs):
250 def get_color_string(cs):
251 if color_dict.has_key(cs):
251 if color_dict.has_key(cs):
252 col = color_dict[cs]
252 col = color_dict[cs]
253 else:
253 else:
254 col = color_dict[cs] = cgenerator.next()
254 col = color_dict[cs] = cgenerator.next()
255 return "color: rgb(%s)! important;" % (', '.join(col))
255 return "color: rgb(%s)! important;" % (', '.join(col))
256
256
257 def url_func(changeset):
257 def url_func(changeset):
258 tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
258 tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
259 " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
259 " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
260
260
261 tooltip_html = tooltip_html % (changeset.author,
261 tooltip_html = tooltip_html % (changeset.author,
262 changeset.date,
262 changeset.date,
263 tooltip(changeset.message))
263 tooltip(changeset.message))
264 lnk_format = 'r%-5s:%s' % (changeset.revision,
264 lnk_format = 'r%-5s:%s' % (changeset.revision,
265 changeset.raw_id)
265 changeset.raw_id)
266 uri = link_to(
266 uri = link_to(
267 lnk_format,
267 lnk_format,
268 url('changeset_home', repo_name=changeset.repository.name,
268 url('changeset_home', repo_name=changeset.repository.name,
269 revision=changeset.raw_id),
269 revision=changeset.raw_id),
270 style=get_color_string(changeset.raw_id),
270 style=get_color_string(changeset.raw_id),
271 class_='tooltip',
271 class_='tooltip',
272 tooltip_title=tooltip_html
272 tooltip_title=tooltip_html
273 )
273 )
274
274
275 uri += '\n'
275 uri += '\n'
276 return uri
276 return uri
277 return literal(annotate_highlight(filenode, url_func, **kwargs))
277 return literal(annotate_highlight(filenode, url_func, **kwargs))
278
278
279 def repo_name_slug(value):
279 def repo_name_slug(value):
280 """
280 """
281 Return slug of name of repository
281 Return slug of name of repository
282 """
282 """
283 slug = urlify(value)
283 slug = urlify(value)
284 for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
284 for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
285 slug = slug.replace(c, '-')
285 slug = slug.replace(c, '-')
286 slug = recursive_replace(slug, '-')
286 slug = recursive_replace(slug, '-')
287 return slug
287 return slug
288
288
289 flash = _Flash()
289 flash = _Flash()
290
290
291
291
292 #===============================================================================
292 #===============================================================================
293 # MERCURIAL FILTERS available via h.
293 # MERCURIAL FILTERS available via h.
294 #===============================================================================
294 #===============================================================================
295 from mercurial import util
295 from mercurial import util
296 from mercurial.templatefilters import age as _age, person as _person
296 from mercurial.templatefilters import age as _age, person as _person
297
297
298 age = lambda x:_age(x)
298 age = lambda x:_age(x)
299 capitalize = lambda x: x.capitalize()
299 capitalize = lambda x: x.capitalize()
300 date = lambda x: util.datestr(x)
300 date = lambda x: util.datestr(x)
301 email = util.email
301 email = util.email
302 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
302 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
303 person = lambda x: _person(x)
303 person = lambda x: _person(x)
304 hgdate = lambda x: "%d %d" % x
304 hgdate = lambda x: "%d %d" % x
305 isodate = lambda x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
305 isodate = lambda x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
306 isodatesec = lambda x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
306 isodatesec = lambda x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
307 localdate = lambda x: (x[0], util.makedate()[1])
307 localdate = lambda x: (x[0], util.makedate()[1])
308 rfc822date = lambda x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
308 rfc822date = lambda x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
309 rfc3339date = lambda x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
309 rfc3339date = lambda x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
310 time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
310 time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
311
311
312
312
313 #===============================================================================
313 #===============================================================================
314 # PERMS
314 # PERMS
315 #===============================================================================
315 #===============================================================================
316 from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
316 from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
317 HasRepoPermissionAny, HasRepoPermissionAll
317 HasRepoPermissionAny, HasRepoPermissionAll
318
318
319 #===============================================================================
319 #===============================================================================
320 # GRAVATAR URL
320 # GRAVATAR URL
321 #===============================================================================
321 #===============================================================================
322 import hashlib
322 import hashlib
323 import urllib
323 import urllib
324 from pylons import request
324 from pylons import request
325
325
326 def gravatar_url(email_address, size=30):
326 def gravatar_url(email_address, size=30):
327 ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
327 ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
328 default = 'identicon'
328 default = 'identicon'
329 baseurl_nossl = "http://www.gravatar.com/avatar/"
329 baseurl_nossl = "http://www.gravatar.com/avatar/"
330 baseurl_ssl = "https://secure.gravatar.com/avatar/"
330 baseurl_ssl = "https://secure.gravatar.com/avatar/"
331 baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
331 baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
332
332
333
333
334 # construct the url
334 # construct the url
335 gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
335 gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
336 gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
336 gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
337
337
338 return gravatar_url
338 return gravatar_url
339
340 def safe_unicode(str):
341 """safe unicode function. In case of UnicodeDecode error we try to return
342 unicode with errors replace, if this failes we return unicode with
343 string_escape decoding """
344
345 try:
346 u_str = unicode(str)
347 except UnicodeDecodeError:
348 try:
349 u_str = unicode(str, 'utf-8', 'replace')
350 except UnicodeDecodeError:
351 #incase we have a decode error just represent as byte string
352 u_str = unicode(str(str).encode('string_escape'))
353
354 return u_str No newline at end of file
@@ -1,199 +1,196
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 # whoosh indexer daemon for hg-app
3 # whoosh indexer daemon for hg-app
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 #
5 #
6 # This program is free software; you can redistribute it and/or
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; version 2
8 # as published by the Free Software Foundation; version 2
9 # of the License or (at your opinion) any later version of the license.
9 # of the License or (at your opinion) any later version of the license.
10 #
10 #
11 # This program is distributed in the hope that it will be useful,
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
14 # GNU General Public License for more details.
15 #
15 #
16 # You should have received a copy of the GNU General Public License
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 # MA 02110-1301, USA.
19 # MA 02110-1301, USA.
20 """
20 """
21 Created on Jan 26, 2010
21 Created on Jan 26, 2010
22
22
23 @author: marcink
23 @author: marcink
24 A deamon will read from task table and run tasks
24 A deamon will read from task table and run tasks
25 """
25 """
26 import sys
26 import sys
27 import os
27 import os
28 from os.path import dirname as dn
28 from os.path import dirname as dn
29 from os.path import join as jn
29 from os.path import join as jn
30
30
31 #to get the pylons_app import
31 #to get the pylons_app import
32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
33 sys.path.append(project_path)
33 sys.path.append(project_path)
34
34
35 from pidlock import LockHeld, DaemonLock
35 from pidlock import LockHeld, DaemonLock
36 import traceback
36 import traceback
37 from pylons_app.config.environment import load_environment
37 from pylons_app.config.environment import load_environment
38 from pylons_app.model.hg_model import HgModel
38 from pylons_app.model.hg_model import HgModel
39 from pylons_app.lib.helpers import safe_unicode
39 from whoosh.index import create_in, open_dir
40 from whoosh.index import create_in, open_dir
40 from shutil import rmtree
41 from shutil import rmtree
41 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
42 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
42 SCHEMA, IDX_NAME
43 SCHEMA, IDX_NAME
43
44
44 import logging
45 import logging
45 import logging.config
46 import logging.config
46 logging.config.fileConfig(jn(project_path, 'development.ini'))
47 logging.config.fileConfig(jn(project_path, 'development.ini'))
47 log = logging.getLogger('whooshIndexer')
48 log = logging.getLogger('whooshIndexer')
48
49
49 def scan_paths(root_location):
50 def scan_paths(root_location):
50 return HgModel.repo_scan('/', root_location, None, True)
51 return HgModel.repo_scan('/', root_location, None, True)
51
52
52 class WhooshIndexingDaemon(object):
53 class WhooshIndexingDaemon(object):
53 """Deamon for atomic jobs"""
54 """Deamon for atomic jobs"""
54
55
55 def __init__(self, indexname='HG_INDEX', repo_location=None):
56 def __init__(self, indexname='HG_INDEX', repo_location=None):
56 self.indexname = indexname
57 self.indexname = indexname
57 self.repo_location = repo_location
58 self.repo_location = repo_location
58
59
59 def get_paths(self, root_dir):
60 def get_paths(self, root_dir):
60 """recursive walk in root dir and return a set of all path in that dir
61 """recursive walk in root dir and return a set of all path in that dir
61 excluding files in .hg dir"""
62 excluding files in .hg dir"""
62 index_paths_ = set()
63 index_paths_ = set()
63 for path, dirs, files in os.walk(root_dir):
64 for path, dirs, files in os.walk(root_dir):
64 if path.find('.hg') == -1:
65 if path.find('.hg') == -1:
65 for f in files:
66 for f in files:
66 index_paths_.add(jn(path, f))
67 index_paths_.add(jn(path, f))
67
68
68 return index_paths_
69 return index_paths_
69
70
70 def add_doc(self, writer, path, repo):
71 def add_doc(self, writer, path, repo):
71 """Adding doc to writer"""
72 """Adding doc to writer"""
72
73
73 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
74 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
74 #we just index the content of choosen files
75 #we just index the content of choosen files
75 if ext in INDEX_EXTENSIONS:
76 if ext in INDEX_EXTENSIONS:
76 log.debug(' >> %s [WITH CONTENT]' % path)
77 log.debug(' >> %s [WITH CONTENT]' % path)
77 fobj = open(path, 'rb')
78 fobj = open(path, 'rb')
78 content = fobj.read()
79 content = fobj.read()
79 fobj.close()
80 fobj.close()
80 try:
81 u_content = safe_unicode(content)
81 u_content = unicode(content)
82 except UnicodeDecodeError:
83 #incase we have a decode error just represent as byte string
84 u_content = unicode(str(content).encode('string_escape'))
85 else:
82 else:
86 log.debug(' >> %s' % path)
83 log.debug(' >> %s' % path)
87 #just index file name without it's content
84 #just index file name without it's content
88 u_content = u''
85 u_content = u''
89
86
90
87
91
88
92 try:
89 try:
93 os.stat(path)
90 os.stat(path)
94 writer.add_document(owner=unicode(repo.contact),
91 writer.add_document(owner=unicode(repo.contact),
95 repository=u"%s" % repo.name,
92 repository=u"%s" % repo.name,
96 path=u"%s" % path,
93 path=u"%s" % path,
97 content=u_content,
94 content=u_content,
98 modtime=os.path.getmtime(path),
95 modtime=os.path.getmtime(path),
99 extension=ext)
96 extension=ext)
100 except OSError, e:
97 except OSError, e:
101 import errno
98 import errno
102 if e.errno == errno.ENOENT:
99 if e.errno == errno.ENOENT:
103 log.debug('path %s does not exist or is a broken symlink' % path)
100 log.debug('path %s does not exist or is a broken symlink' % path)
104 else:
101 else:
105 raise e
102 raise e
106
103
107
104
108 def build_index(self):
105 def build_index(self):
109 if os.path.exists(IDX_LOCATION):
106 if os.path.exists(IDX_LOCATION):
110 log.debug('removing previos index')
107 log.debug('removing previos index')
111 rmtree(IDX_LOCATION)
108 rmtree(IDX_LOCATION)
112
109
113 if not os.path.exists(IDX_LOCATION):
110 if not os.path.exists(IDX_LOCATION):
114 os.mkdir(IDX_LOCATION)
111 os.mkdir(IDX_LOCATION)
115
112
116 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
113 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
117 writer = idx.writer()
114 writer = idx.writer()
118
115
119 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
116 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
120 log.debug('building index @ %s' % repo.path)
117 log.debug('building index @ %s' % repo.path)
121
118
122 for idx_path in self.get_paths(repo.path):
119 for idx_path in self.get_paths(repo.path):
123 self.add_doc(writer, idx_path, repo)
120 self.add_doc(writer, idx_path, repo)
124 writer.commit(merge=True)
121 writer.commit(merge=True)
125
122
126 log.debug('>>> FINISHED BUILDING INDEX <<<')
123 log.debug('>>> FINISHED BUILDING INDEX <<<')
127
124
128
125
129 def update_index(self):
126 def update_index(self):
130 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
127 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
131
128
132 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
129 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
133 # The set of all paths in the index
130 # The set of all paths in the index
134 indexed_paths = set()
131 indexed_paths = set()
135 # The set of all paths we need to re-index
132 # The set of all paths we need to re-index
136 to_index = set()
133 to_index = set()
137
134
138 reader = idx.reader()
135 reader = idx.reader()
139 writer = idx.writer()
136 writer = idx.writer()
140
137
141 # Loop over the stored fields in the index
138 # Loop over the stored fields in the index
142 for fields in reader.all_stored_fields():
139 for fields in reader.all_stored_fields():
143 indexed_path = fields['path']
140 indexed_path = fields['path']
144 indexed_paths.add(indexed_path)
141 indexed_paths.add(indexed_path)
145
142
146 if not os.path.exists(indexed_path):
143 if not os.path.exists(indexed_path):
147 # This file was deleted since it was indexed
144 # This file was deleted since it was indexed
148 log.debug('removing from index %s' % indexed_path)
145 log.debug('removing from index %s' % indexed_path)
149 writer.delete_by_term('path', indexed_path)
146 writer.delete_by_term('path', indexed_path)
150
147
151 else:
148 else:
152 # Check if this file was changed since it
149 # Check if this file was changed since it
153 # was indexed
150 # was indexed
154 indexed_time = fields['modtime']
151 indexed_time = fields['modtime']
155
152
156 mtime = os.path.getmtime(indexed_path)
153 mtime = os.path.getmtime(indexed_path)
157
154
158 if mtime > indexed_time:
155 if mtime > indexed_time:
159
156
160 # The file has changed, delete it and add it to the list of
157 # The file has changed, delete it and add it to the list of
161 # files to reindex
158 # files to reindex
162 log.debug('adding to reindex list %s' % indexed_path)
159 log.debug('adding to reindex list %s' % indexed_path)
163 writer.delete_by_term('path', indexed_path)
160 writer.delete_by_term('path', indexed_path)
164 to_index.add(indexed_path)
161 to_index.add(indexed_path)
165 #writer.commit()
162 #writer.commit()
166
163
167 # Loop over the files in the filesystem
164 # Loop over the files in the filesystem
168 # Assume we have a function that gathers the filenames of the
165 # Assume we have a function that gathers the filenames of the
169 # documents to be indexed
166 # documents to be indexed
170 for repo in scan_paths(self.repo_location).values():
167 for repo in scan_paths(self.repo_location).values():
171 for path in self.get_paths(repo.path):
168 for path in self.get_paths(repo.path):
172 if path in to_index or path not in indexed_paths:
169 if path in to_index or path not in indexed_paths:
173 # This is either a file that's changed, or a new file
170 # This is either a file that's changed, or a new file
174 # that wasn't indexed before. So index it!
171 # that wasn't indexed before. So index it!
175 self.add_doc(writer, path, repo)
172 self.add_doc(writer, path, repo)
176 log.debug('reindexing %s' % path)
173 log.debug('reindexing %s' % path)
177
174
178 writer.commit(merge=True)
175 writer.commit(merge=True)
179 #idx.optimize()
176 #idx.optimize()
180 log.debug('>>> FINISHED <<<')
177 log.debug('>>> FINISHED <<<')
181
178
182 def run(self, full_index=False):
179 def run(self, full_index=False):
183 """Run daemon"""
180 """Run daemon"""
184 if full_index:
181 if full_index:
185 self.build_index()
182 self.build_index()
186 else:
183 else:
187 self.update_index()
184 self.update_index()
188
185
189 if __name__ == "__main__":
186 if __name__ == "__main__":
190 repo_location = '/home/marcink/hg_repos/*'
187 repo_location = '/home/marcink/hg_repos/*'
191 full_index = True # False means looking just for changes
188 full_index = True # False means looking just for changes
192 try:
189 try:
193 l = DaemonLock()
190 l = DaemonLock()
194 WhooshIndexingDaemon(repo_location=repo_location)\
191 WhooshIndexingDaemon(repo_location=repo_location)\
195 .run(full_index=full_index)
192 .run(full_index=full_index)
196 l.release()
193 l.release()
197 except LockHeld:
194 except LockHeld:
198 sys.exit(1)
195 sys.exit(1)
199
196
General Comments 0
You need to be logged in to leave comments. Login now