##// END OF EJS Templates
added safe unicode funtion, and implemented it in whoosh indexer
marcink -
r443:e5157e2a default
parent child Browse files
Show More
@@ -1,338 +1,354
1 1 """Helper functions
2 2
3 3 Consists of functions to typically be used within templates, but also
4 4 available to Controllers. This module is available to both as 'h'.
5 5 """
6 6 from pygments.formatters import HtmlFormatter
7 7 from pygments import highlight as code_highlight
8 8 from pylons import url, app_globals as g
9 9 from pylons.i18n.translation import _, ungettext
10 10 from vcs.utils.annotate import annotate_highlight
11 11 from webhelpers.html import literal, HTML, escape
12 12 from webhelpers.html.tools import *
13 13 from webhelpers.html.builder import make_tag
14 14 from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
15 15 end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
16 16 link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
17 17 password, textarea, title, ul, xml_declaration, radio
18 18 from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
19 19 mail_to, strip_links, strip_tags, tag_re
20 20 from webhelpers.number import format_byte_size, format_bit_size
21 21 from webhelpers.pylonslib import Flash as _Flash
22 22 from webhelpers.pylonslib.secure_form import secure_form
23 23 from webhelpers.text import chop_at, collapse, convert_accented_entities, \
24 24 convert_misc_entities, lchop, plural, rchop, remove_formatting, \
25 25 replace_whitespace, urlify, truncate, wrap_paragraphs
26 26
27 27 #Custom helpers here :)
28 28 class _Link(object):
29 29 '''
30 30 Make a url based on label and url with help of url_for
31 31 @param label:name of link if not defined url is used
32 32 @param url: the url for link
33 33 '''
34 34
35 35 def __call__(self, label='', *url_, **urlargs):
36 36 if label is None or '':
37 37 label = url
38 38 link_fn = link_to(label, url(*url_, **urlargs))
39 39 return link_fn
40 40
41 41 link = _Link()
42 42
43 43 class _GetError(object):
44 44
45 45 def __call__(self, field_name, form_errors):
46 46 tmpl = """<span class="error_msg">%s</span>"""
47 47 if form_errors and form_errors.has_key(field_name):
48 48 return literal(tmpl % form_errors.get(field_name))
49 49
50 50 get_error = _GetError()
51 51
52 52 def recursive_replace(str, replace=' '):
53 53 """
54 54 Recursive replace of given sign to just one instance
55 55 @param str: given string
56 56 @param replace:char to find and replace multiple instances
57 57
58 58 Examples::
59 59 >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
60 60 'Mighty-Mighty-Bo-sstones'
61 61 """
62 62
63 63 if str.find(replace * 2) == -1:
64 64 return str
65 65 else:
66 66 str = str.replace(replace * 2, replace)
67 67 return recursive_replace(str, replace)
68 68
69 69 class _ToolTip(object):
70 70
71 71 def __call__(self, tooltip_title, trim_at=50):
72 72 """
73 73 Special function just to wrap our text into nice formatted autowrapped
74 74 text
75 75 @param tooltip_title:
76 76 """
77 77
78 78 return literal(wrap_paragraphs(tooltip_title, trim_at)\
79 79 .replace('\n', '<br/>'))
80 80
81 81 def activate(self):
82 82 """
83 83 Adds tooltip mechanism to the given Html all tooltips have to have
84 84 set class tooltip and set attribute tooltip_title.
85 85 Then a tooltip will be generated based on that
86 86 All with yui js tooltip
87 87 """
88 88
89 89 js = '''
90 90 YAHOO.util.Event.onDOMReady(function(){
91 91 function toolTipsId(){
92 92 var ids = [];
93 93 var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
94 94
95 95 for (var i = 0; i < tts.length; i++) {
96 96 //if element doesn not have and id autgenerate one for tooltip
97 97
98 98 if (!tts[i].id){
99 99 tts[i].id='tt'+i*100;
100 100 }
101 101 ids.push(tts[i].id);
102 102 }
103 103 return ids
104 104 };
105 105 var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
106 106 context: toolTipsId(),
107 107 monitorresize:false,
108 108 xyoffset :[0,0],
109 109 autodismissdelay:300000,
110 110 hidedelay:5,
111 111 showdelay:20,
112 112 });
113 113
114 114 //Mouse Over event disabled for new repositories since they dont
115 115 //have last commit message
116 116 myToolTips.contextMouseOverEvent.subscribe(
117 117 function(type, args) {
118 118 var context = args[0];
119 119 var txt = context.getAttribute('tooltip_title');
120 120 if(txt){
121 121 return true;
122 122 }
123 123 else{
124 124 return false;
125 125 }
126 126 });
127 127
128 128
129 129 // Set the text for the tooltip just before we display it. Lazy method
130 130 myToolTips.contextTriggerEvent.subscribe(
131 131 function(type, args) {
132 132
133 133
134 134 var context = args[0];
135 135
136 136 var txt = context.getAttribute('tooltip_title');
137 137 this.cfg.setProperty("text", txt);
138 138
139 139
140 140 // positioning of tooltip
141 141 var tt_w = this.element.clientWidth;
142 142 var tt_h = this.element.clientHeight;
143 143
144 144 var context_w = context.offsetWidth;
145 145 var context_h = context.offsetHeight;
146 146
147 147 var pos_x = YAHOO.util.Dom.getX(context);
148 148 var pos_y = YAHOO.util.Dom.getY(context);
149 149
150 150 var display_strategy = 'top';
151 151 var xy_pos = [0,0];
152 152 switch (display_strategy){
153 153
154 154 case 'top':
155 155 var cur_x = (pos_x+context_w/2)-(tt_w/2);
156 156 var cur_y = pos_y-tt_h-4;
157 157 xy_pos = [cur_x,cur_y];
158 158 break;
159 159 case 'bottom':
160 160 var cur_x = (pos_x+context_w/2)-(tt_w/2);
161 161 var cur_y = pos_y+context_h+4;
162 162 xy_pos = [cur_x,cur_y];
163 163 break;
164 164 case 'left':
165 165 var cur_x = (pos_x-tt_w-4);
166 166 var cur_y = pos_y-((tt_h/2)-context_h/2);
167 167 xy_pos = [cur_x,cur_y];
168 168 break;
169 169 case 'right':
170 170 var cur_x = (pos_x+context_w+4);
171 171 var cur_y = pos_y-((tt_h/2)-context_h/2);
172 172 xy_pos = [cur_x,cur_y];
173 173 break;
174 174 default:
175 175 var cur_x = (pos_x+context_w/2)-(tt_w/2);
176 176 var cur_y = pos_y-tt_h-4;
177 177 xy_pos = [cur_x,cur_y];
178 178 break;
179 179
180 180 }
181 181
182 182 this.cfg.setProperty("xy",xy_pos);
183 183
184 184 });
185 185
186 186 //Mouse out
187 187 myToolTips.contextMouseOutEvent.subscribe(
188 188 function(type, args) {
189 189 var context = args[0];
190 190
191 191 });
192 192 });
193 193 '''
194 194 return literal(js)
195 195
196 196 tooltip = _ToolTip()
197 197
198 198 class _FilesBreadCrumbs(object):
199 199
200 200 def __call__(self, repo_name, rev, paths):
201 201 url_l = [link_to(repo_name, url('files_home',
202 202 repo_name=repo_name,
203 203 revision=rev, f_path=''))]
204 204 paths_l = paths.split('/')
205 205
206 206 for cnt, p in enumerate(paths_l, 1):
207 207 if p != '':
208 208 url_l.append(link_to(p, url('files_home',
209 209 repo_name=repo_name,
210 210 revision=rev,
211 211 f_path='/'.join(paths_l[:cnt]))))
212 212
213 213 return literal(' / '.join(url_l))
214 214
215 215 files_breadcrumbs = _FilesBreadCrumbs()
216 216
217 217 def pygmentize(filenode, **kwargs):
218 218 """
219 219 pygmentize function using pygments
220 220 @param filenode:
221 221 """
222 222 return literal(code_highlight(filenode.content,
223 223 filenode.lexer, HtmlFormatter(**kwargs)))
224 224
225 225 def pygmentize_annotation(filenode, **kwargs):
226 226 """
227 227 pygmentize function for annotation
228 228 @param filenode:
229 229 """
230 230
231 231 color_dict = {}
232 232 def gen_color():
233 233 """generator for getting 10k of evenly distibuted colors using hsv color
234 234 and golden ratio.
235 235 """
236 236 import colorsys
237 237 n = 10000
238 238 golden_ratio = 0.618033988749895
239 239 h = 0.22717784590367374
240 240 #generate 10k nice web friendly colors in the same order
241 241 for c in xrange(n):
242 242 h +=golden_ratio
243 243 h %= 1
244 244 HSV_tuple = [h, 0.95, 0.95]
245 245 RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
246 246 yield map(lambda x:str(int(x*256)),RGB_tuple)
247 247
248 248 cgenerator = gen_color()
249 249
250 250 def get_color_string(cs):
251 251 if color_dict.has_key(cs):
252 252 col = color_dict[cs]
253 253 else:
254 254 col = color_dict[cs] = cgenerator.next()
255 255 return "color: rgb(%s)! important;" % (', '.join(col))
256 256
257 257 def url_func(changeset):
258 258 tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
259 259 " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
260 260
261 261 tooltip_html = tooltip_html % (changeset.author,
262 262 changeset.date,
263 263 tooltip(changeset.message))
264 264 lnk_format = 'r%-5s:%s' % (changeset.revision,
265 265 changeset.raw_id)
266 266 uri = link_to(
267 267 lnk_format,
268 268 url('changeset_home', repo_name=changeset.repository.name,
269 269 revision=changeset.raw_id),
270 270 style=get_color_string(changeset.raw_id),
271 271 class_='tooltip',
272 272 tooltip_title=tooltip_html
273 273 )
274 274
275 275 uri += '\n'
276 276 return uri
277 277 return literal(annotate_highlight(filenode, url_func, **kwargs))
278 278
279 279 def repo_name_slug(value):
280 280 """
281 281 Return slug of name of repository
282 282 """
283 283 slug = urlify(value)
284 284 for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
285 285 slug = slug.replace(c, '-')
286 286 slug = recursive_replace(slug, '-')
287 287 return slug
288 288
289 289 flash = _Flash()
290 290
291 291
292 292 #===============================================================================
293 293 # MERCURIAL FILTERS available via h.
294 294 #===============================================================================
295 295 from mercurial import util
296 296 from mercurial.templatefilters import age as _age, person as _person
297 297
298 298 age = lambda x:_age(x)
299 299 capitalize = lambda x: x.capitalize()
300 300 date = lambda x: util.datestr(x)
301 301 email = util.email
302 302 email_or_none = lambda x: util.email(x) if util.email(x) != x else None
303 303 person = lambda x: _person(x)
304 304 hgdate = lambda x: "%d %d" % x
305 305 isodate = lambda x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
306 306 isodatesec = lambda x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
307 307 localdate = lambda x: (x[0], util.makedate()[1])
308 308 rfc822date = lambda x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
309 309 rfc3339date = lambda x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
310 310 time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
311 311
312 312
313 313 #===============================================================================
314 314 # PERMS
315 315 #===============================================================================
316 316 from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
317 317 HasRepoPermissionAny, HasRepoPermissionAll
318 318
319 319 #===============================================================================
320 320 # GRAVATAR URL
321 321 #===============================================================================
322 322 import hashlib
323 323 import urllib
324 324 from pylons import request
325 325
326 326 def gravatar_url(email_address, size=30):
327 327 ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
328 328 default = 'identicon'
329 329 baseurl_nossl = "http://www.gravatar.com/avatar/"
330 330 baseurl_ssl = "https://secure.gravatar.com/avatar/"
331 331 baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
332 332
333 333
334 334 # construct the url
335 335 gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
336 336 gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
337 337
338 338 return gravatar_url
339
340 def safe_unicode(str):
341 """safe unicode function. In case of UnicodeDecode error we try to return
342 unicode with errors replace, if this failes we return unicode with
343 string_escape decoding """
344
345 try:
346 u_str = unicode(str)
347 except UnicodeDecodeError:
348 try:
349 u_str = unicode(str, 'utf-8', 'replace')
350 except UnicodeDecodeError:
351 #incase we have a decode error just represent as byte string
352 u_str = unicode(str(str).encode('string_escape'))
353
354 return u_str No newline at end of file
@@ -1,199 +1,196
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 # whoosh indexer daemon for hg-app
4 4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 5 #
6 6 # This program is free software; you can redistribute it and/or
7 7 # modify it under the terms of the GNU General Public License
8 8 # as published by the Free Software Foundation; version 2
9 9 # of the License or (at your opinion) any later version of the license.
10 10 #
11 11 # This program is distributed in the hope that it will be useful,
12 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 14 # GNU General Public License for more details.
15 15 #
16 16 # You should have received a copy of the GNU General Public License
17 17 # along with this program; if not, write to the Free Software
18 18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 19 # MA 02110-1301, USA.
20 20 """
21 21 Created on Jan 26, 2010
22 22
23 23 @author: marcink
24 24 A deamon will read from task table and run tasks
25 25 """
26 26 import sys
27 27 import os
28 28 from os.path import dirname as dn
29 29 from os.path import join as jn
30 30
31 31 #to get the pylons_app import
32 32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
33 33 sys.path.append(project_path)
34 34
35 35 from pidlock import LockHeld, DaemonLock
36 36 import traceback
37 37 from pylons_app.config.environment import load_environment
38 38 from pylons_app.model.hg_model import HgModel
39 from pylons_app.lib.helpers import safe_unicode
39 40 from whoosh.index import create_in, open_dir
40 41 from shutil import rmtree
41 42 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
42 43 SCHEMA, IDX_NAME
43 44
44 45 import logging
45 46 import logging.config
46 47 logging.config.fileConfig(jn(project_path, 'development.ini'))
47 48 log = logging.getLogger('whooshIndexer')
48 49
49 50 def scan_paths(root_location):
50 51 return HgModel.repo_scan('/', root_location, None, True)
51 52
52 53 class WhooshIndexingDaemon(object):
53 54 """Deamon for atomic jobs"""
54 55
55 56 def __init__(self, indexname='HG_INDEX', repo_location=None):
56 57 self.indexname = indexname
57 58 self.repo_location = repo_location
58 59
59 60 def get_paths(self, root_dir):
60 61 """recursive walk in root dir and return a set of all path in that dir
61 62 excluding files in .hg dir"""
62 63 index_paths_ = set()
63 64 for path, dirs, files in os.walk(root_dir):
64 65 if path.find('.hg') == -1:
65 66 for f in files:
66 67 index_paths_.add(jn(path, f))
67 68
68 69 return index_paths_
69 70
70 71 def add_doc(self, writer, path, repo):
71 72 """Adding doc to writer"""
72 73
73 74 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
74 75 #we just index the content of choosen files
75 76 if ext in INDEX_EXTENSIONS:
76 77 log.debug(' >> %s [WITH CONTENT]' % path)
77 78 fobj = open(path, 'rb')
78 79 content = fobj.read()
79 80 fobj.close()
80 try:
81 u_content = unicode(content)
82 except UnicodeDecodeError:
83 #incase we have a decode error just represent as byte string
84 u_content = unicode(str(content).encode('string_escape'))
81 u_content = safe_unicode(content)
85 82 else:
86 83 log.debug(' >> %s' % path)
87 84 #just index file name without it's content
88 85 u_content = u''
89 86
90 87
91 88
92 89 try:
93 90 os.stat(path)
94 91 writer.add_document(owner=unicode(repo.contact),
95 92 repository=u"%s" % repo.name,
96 93 path=u"%s" % path,
97 94 content=u_content,
98 95 modtime=os.path.getmtime(path),
99 96 extension=ext)
100 97 except OSError, e:
101 98 import errno
102 99 if e.errno == errno.ENOENT:
103 100 log.debug('path %s does not exist or is a broken symlink' % path)
104 101 else:
105 102 raise e
106 103
107 104
108 105 def build_index(self):
109 106 if os.path.exists(IDX_LOCATION):
110 107 log.debug('removing previos index')
111 108 rmtree(IDX_LOCATION)
112 109
113 110 if not os.path.exists(IDX_LOCATION):
114 111 os.mkdir(IDX_LOCATION)
115 112
116 113 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
117 114 writer = idx.writer()
118 115
119 116 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
120 117 log.debug('building index @ %s' % repo.path)
121 118
122 119 for idx_path in self.get_paths(repo.path):
123 120 self.add_doc(writer, idx_path, repo)
124 121 writer.commit(merge=True)
125 122
126 123 log.debug('>>> FINISHED BUILDING INDEX <<<')
127 124
128 125
129 126 def update_index(self):
130 127 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
131 128
132 129 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
133 130 # The set of all paths in the index
134 131 indexed_paths = set()
135 132 # The set of all paths we need to re-index
136 133 to_index = set()
137 134
138 135 reader = idx.reader()
139 136 writer = idx.writer()
140 137
141 138 # Loop over the stored fields in the index
142 139 for fields in reader.all_stored_fields():
143 140 indexed_path = fields['path']
144 141 indexed_paths.add(indexed_path)
145 142
146 143 if not os.path.exists(indexed_path):
147 144 # This file was deleted since it was indexed
148 145 log.debug('removing from index %s' % indexed_path)
149 146 writer.delete_by_term('path', indexed_path)
150 147
151 148 else:
152 149 # Check if this file was changed since it
153 150 # was indexed
154 151 indexed_time = fields['modtime']
155 152
156 153 mtime = os.path.getmtime(indexed_path)
157 154
158 155 if mtime > indexed_time:
159 156
160 157 # The file has changed, delete it and add it to the list of
161 158 # files to reindex
162 159 log.debug('adding to reindex list %s' % indexed_path)
163 160 writer.delete_by_term('path', indexed_path)
164 161 to_index.add(indexed_path)
165 162 #writer.commit()
166 163
167 164 # Loop over the files in the filesystem
168 165 # Assume we have a function that gathers the filenames of the
169 166 # documents to be indexed
170 167 for repo in scan_paths(self.repo_location).values():
171 168 for path in self.get_paths(repo.path):
172 169 if path in to_index or path not in indexed_paths:
173 170 # This is either a file that's changed, or a new file
174 171 # that wasn't indexed before. So index it!
175 172 self.add_doc(writer, path, repo)
176 173 log.debug('reindexing %s' % path)
177 174
178 175 writer.commit(merge=True)
179 176 #idx.optimize()
180 177 log.debug('>>> FINISHED <<<')
181 178
182 179 def run(self, full_index=False):
183 180 """Run daemon"""
184 181 if full_index:
185 182 self.build_index()
186 183 else:
187 184 self.update_index()
188 185
189 186 if __name__ == "__main__":
190 187 repo_location = '/home/marcink/hg_repos/*'
191 188 full_index = True # False means looking just for changes
192 189 try:
193 190 l = DaemonLock()
194 191 WhooshIndexingDaemon(repo_location=repo_location)\
195 192 .run(full_index=full_index)
196 193 l.release()
197 194 except LockHeld:
198 195 sys.exit(1)
199 196
General Comments 0
You need to be logged in to leave comments. Login now