upstream/kallithea Commit - r443:e5157e2a

added safe unicode funtion, and implemented it in whoosh indexer

marcink -

r443:e5157e2a default

parent child

pylons_app/lib/helpers.py

0 +16 0

              """Helper functions
              Consists of functions to typically be used within templates, but also
              available to Controllers. This module is available to both as 'h'.
              """
              from pygments.formatters import HtmlFormatter
              from pygments import highlight as code_highlight
              from pylons import url, app_globals as g
              from pylons.i18n.translation import _, ungettext
              from vcs.utils.annotate import annotate_highlight
              from webhelpers.html import literal, HTML, escape
              from webhelpers.html.tools import *
              from webhelpers.html.builder import make_tag
              from webhelpers.html.tags import auto_discovery_link, checkbox, css_classes, \
                  end_form, file, form, hidden, image, javascript_link, link_to, link_to_if, \
                  link_to_unless, ol, required_legend, select, stylesheet_link, submit, text, \
                  password, textarea, title, ul, xml_declaration, radio
              from webhelpers.html.tools import auto_link, button_to, highlight, js_obfuscate, \
                  mail_to, strip_links, strip_tags, tag_re
              from webhelpers.number import format_byte_size, format_bit_size
              from webhelpers.pylonslib import Flash as _Flash
              from webhelpers.pylonslib.secure_form import secure_form
              from webhelpers.text import chop_at, collapse, convert_accented_entities, \
                  convert_misc_entities, lchop, plural, rchop, remove_formatting, \
                  replace_whitespace, urlify, truncate, wrap_paragraphs
              #Custom helpers here :)
              class _Link(object):
                  '''
                  Make a url based on label and url with help of url_for
                  @param label:name of link    if not defined url is used
                  @param url: the url for link
                  '''
                  def __call__(self, label='', *url_, **urlargs):
                      if label is None or '':
                          label = url
                      link_fn = link_to(label, url(*url_, **urlargs))
                      return link_fn
              link = _Link()
              class _GetError(object):
                  def __call__(self, field_name, form_errors):
                      tmpl = """<span class="error_msg">%s</span>"""
                      if form_errors and form_errors.has_key(field_name):
                          return literal(tmpl % form_errors.get(field_name))
              get_error = _GetError()
              def recursive_replace(str, replace=' '):
                  """
                  Recursive replace of given sign to just one instance
                  @param str: given string
                  @param replace:char to find and replace multiple instances
                  Examples::
                  >>> recursive_replace("Mighty---Mighty-Bo--sstones",'-')
                  'Mighty-Mighty-Bo-sstones'
                  """
                  if str.find(replace * 2) == -1:
                      return str
                  else:
                      str = str.replace(replace * 2, replace)
                      return recursive_replace(str, replace)
              class _ToolTip(object):
                  def __call__(self, tooltip_title, trim_at=50):
                      """
                      Special function just to wrap our text into nice formatted autowrapped
                      text
                      @param tooltip_title:
                      """
                      return literal(wrap_paragraphs(tooltip_title, trim_at)\
                                     .replace('\n', '<br/>'))
                  def activate(self):
                      """
                      Adds tooltip mechanism to the given Html all tooltips have to have
                      set class tooltip and set attribute tooltip_title.
                      Then a tooltip will be generated based on that
                      All with yui js tooltip
                      """
                      js = '''
                      YAHOO.util.Event.onDOMReady(function(){
                          function toolTipsId(){
                              var ids = [];
                              var tts = YAHOO.util.Dom.getElementsByClassName('tooltip');
                              for (var i = 0; i < tts.length; i++) {
                                  //if element doesn not have and id autgenerate one for tooltip
                                  if (!tts[i].id){
                                      tts[i].id='tt'+i*100;
                                  }
                                  ids.push(tts[i].id);
                              }
                              return ids
                          };
                          var myToolTips = new YAHOO.widget.Tooltip("tooltip", {
                              context: toolTipsId(),
                              monitorresize:false,
                              xyoffset :[0,0],
                              autodismissdelay:300000,
                              hidedelay:5,
                              showdelay:20,
                          });
                          //Mouse Over event disabled for new repositories since they dont
                          //have last commit message
                          myToolTips.contextMouseOverEvent.subscribe(
                              function(type, args) {
                                  var context = args[0];
                                  var txt = context.getAttribute('tooltip_title');
                                  if(txt){
                                      return true;
                                  }
                                  else{
                                      return false;
                                  }
                              });
                          // Set the text for the tooltip just before we display it. Lazy method
                          myToolTips.contextTriggerEvent.subscribe(
                               function(type, args) {
                                      var context = args[0];
                                      var txt = context.getAttribute('tooltip_title');
                                      this.cfg.setProperty("text", txt);
                                      // positioning of tooltip
                                      var tt_w = this.element.clientWidth;
                                      var tt_h = this.element.clientHeight;
                                      var context_w = context.offsetWidth;
                                      var context_h = context.offsetHeight;
                                      var pos_x = YAHOO.util.Dom.getX(context);
                                      var pos_y = YAHOO.util.Dom.getY(context);
                                      var display_strategy = 'top';
                                      var xy_pos = [0,0];
                                      switch (display_strategy){
                                          case 'top':
                                              var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                              var cur_y = pos_y-tt_h-4;
                                              xy_pos = [cur_x,cur_y];
                                              break;
                                          case 'bottom':
                                              var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                              var cur_y = pos_y+context_h+4;
                                              xy_pos = [cur_x,cur_y];
                                              break;
                                          case 'left':
                                              var cur_x = (pos_x-tt_w-4);
                                              var cur_y = pos_y-((tt_h/2)-context_h/2);
                                              xy_pos = [cur_x,cur_y];
                                              break;
                                          case 'right':
                                              var cur_x = (pos_x+context_w+4);
                                              var cur_y = pos_y-((tt_h/2)-context_h/2);
                                              xy_pos = [cur_x,cur_y];
                                              break;
                                           default:
                                              var cur_x = (pos_x+context_w/2)-(tt_w/2);
                                              var cur_y = pos_y-tt_h-4;
                                              xy_pos = [cur_x,cur_y];
                                              break;
                                      }
                                      this.cfg.setProperty("xy",xy_pos);
                                });
                          //Mouse out
                          myToolTips.contextMouseOutEvent.subscribe(
                              function(type, args) {
                                  var context = args[0];
                              });
                      });
                      '''
                      return literal(js)
              tooltip = _ToolTip()
              class _FilesBreadCrumbs(object):
                  def __call__(self, repo_name, rev, paths):
                      url_l = [link_to(repo_name, url('files_home',
                                                      repo_name=repo_name,
                                                      revision=rev, f_path=''))]
                      paths_l = paths.split('/')
                      for cnt, p in enumerate(paths_l, 1):
                          if p != '':
                              url_l.append(link_to(p, url('files_home',
                                                          repo_name=repo_name,
                                                          revision=rev,
                                                          f_path='/'.join(paths_l[:cnt]))))
                      return literal(' / '.join(url_l))
              files_breadcrumbs = _FilesBreadCrumbs()
              def pygmentize(filenode, **kwargs):
                  """
                  pygmentize function using pygments
                  @param filenode:
                  """
                  return literal(code_highlight(filenode.content,
                                                filenode.lexer, HtmlFormatter(**kwargs)))
              def pygmentize_annotation(filenode, **kwargs):
                  """
                  pygmentize function for annotation
                  @param filenode:
                  """
                  color_dict = {}
                  def gen_color():
                      """generator for getting 10k of evenly distibuted colors using hsv color
                      and golden ratio.
                      """
                      import colorsys
                      n = 10000
                      golden_ratio = 0.618033988749895
                      h = 0.22717784590367374
                      #generate 10k nice web friendly colors in the same order
                      for c in xrange(n):
                          h +=golden_ratio
                          h %= 1
                          HSV_tuple = [h, 0.95, 0.95]
                          RGB_tuple = colorsys.hsv_to_rgb(*HSV_tuple)
                          yield map(lambda x:str(int(x*256)),RGB_tuple)
                  cgenerator = gen_color()
                  def get_color_string(cs):
                      if color_dict.has_key(cs):
                          col = color_dict[cs]
                      else:
                          col = color_dict[cs] = cgenerator.next()
                      return "color: rgb(%s)! important;" % (', '.join(col))
                  def url_func(changeset):
                      tooltip_html = "<div style='font-size:0.8em'><b>Author:</b>"+\
                      " %s<br/><b>Date:</b> %s</b><br/><b>Message:</b> %s<br/></div>"
                      tooltip_html = tooltip_html % (changeset.author,
                                                             changeset.date,
                                                             tooltip(changeset.message))
                      lnk_format = 'r%-5s:%s' % (changeset.revision,
                                               changeset.raw_id)
                      uri = link_to(
                              lnk_format,
                              url('changeset_home', repo_name=changeset.repository.name,
                                  revision=changeset.raw_id),
                              style=get_color_string(changeset.raw_id),
                              class_='tooltip',
                              tooltip_title=tooltip_html
                            )
                      uri += '\n'
                      return uri
                  return literal(annotate_highlight(filenode, url_func, **kwargs))
              def repo_name_slug(value):
                  """
                  Return slug of name of repository
                  """
                  slug = urlify(value)
                  for c in """=[]\;'"<>,/~!@#$%^&*()+{}|:""":
                      slug = slug.replace(c, '-')
                  slug = recursive_replace(slug, '-')
                  return slug
              flash = _Flash()
              #===============================================================================
              # MERCURIAL FILTERS available via h.
              #===============================================================================
              from mercurial import util
              from mercurial.templatefilters import age as _age, person as _person
              age = lambda  x:_age(x)
              capitalize = lambda x: x.capitalize()
              date = lambda x: util.datestr(x)
              email = util.email
              email_or_none = lambda x: util.email(x) if util.email(x) != x else None
              person = lambda x: _person(x)
              hgdate = lambda  x: "%d %d" % x
              isodate = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M %1%2')
              isodatesec = lambda  x: util.datestr(x, '%Y-%m-%d %H:%M:%S %1%2')
              localdate = lambda  x: (x[0], util.makedate()[1])
              rfc822date = lambda  x: util.datestr(x, "%a, %d %b %Y %H:%M:%S %1%2")
              rfc3339date = lambda  x: util.datestr(x, "%Y-%m-%dT%H:%M:%S%1:%2")
              time_ago = lambda x: util.datestr(_age(x), "%a, %d %b %Y %H:%M:%S %1%2")
              #===============================================================================
              # PERMS
              #===============================================================================
              from pylons_app.lib.auth import HasPermissionAny, HasPermissionAll, \
              HasRepoPermissionAny, HasRepoPermissionAll
              #===============================================================================
              # GRAVATAR URL
              #===============================================================================
              import hashlib
              import urllib
              from pylons import request
              def gravatar_url(email_address, size=30):
                  ssl_enabled = 'https' == request.environ.get('HTTP_X_URL_SCHEME')
                  default = 'identicon'
                  baseurl_nossl = "http://www.gravatar.com/avatar/"
                  baseurl_ssl = "https://secure.gravatar.com/avatar/"
                  baseurl = baseurl_ssl if ssl_enabled else baseurl_nossl
                  # construct the url
                  gravatar_url = baseurl + hashlib.md5(email_address.lower()).hexdigest() + "?"
                  gravatar_url += urllib.urlencode({'d':default, 's':str(size)})
                  return gravatar_url
+             def safe_unicode(str):
+                 """safe unicode function. In case of UnicodeDecode error we try to return
+                 unicode with errors replace, if this failes we return unicode with
+                 string_escape decoding """
+                 try:
+                     u_str = unicode(str)
+                 except UnicodeDecodeError:
+                     try:
+                         u_str = unicode(str, 'utf-8', 'replace')
+                     except UnicodeDecodeError:
+                         #incase we have a decode error just represent as byte string
+                         u_str = unicode(str(str).encode('string_escape'))
+                 return u_str
  No newline at end of file

pylons_app/lib/indexers/daemon.py

0 +2 -5

              #!/usr/bin/env python
              # encoding: utf-8
              # whoosh indexer daemon for hg-app
              # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
              #
              # This program is free software; you can redistribute it and/or
              # modify it under the terms of the GNU General Public License
              # as published by the Free Software Foundation; version 2
              # of the License or (at your opinion) any later version of the license.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program; if not, write to the Free Software
              # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
              # MA  02110-1301, USA.
              """
              Created on Jan 26, 2010
              @author: marcink
              A deamon will read from task table and run tasks
              """
              import sys
              import os
              from os.path import dirname as dn
              from os.path import join as jn
              #to get the pylons_app import
              project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
              sys.path.append(project_path)
              from pidlock import LockHeld, DaemonLock
              import traceback
              from pylons_app.config.environment import load_environment
              from pylons_app.model.hg_model import HgModel
+             from pylons_app.lib.helpers import safe_unicode
              from whoosh.index import create_in, open_dir
              from shutil import rmtree
              from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \
              SCHEMA, IDX_NAME
              import logging
              import logging.config
              logging.config.fileConfig(jn(project_path, 'development.ini'))
              log = logging.getLogger('whooshIndexer')
              def scan_paths(root_location):
                  return HgModel.repo_scan('/', root_location, None, True)
              class WhooshIndexingDaemon(object):
                  """Deamon for atomic jobs"""
                  def __init__(self, indexname='HG_INDEX', repo_location=None):
                      self.indexname = indexname
                      self.repo_location = repo_location
                  def get_paths(self, root_dir):
                      """recursive walk in root dir and return a set of all path in that dir
                      excluding files in .hg dir"""
                      index_paths_ = set()
                      for path, dirs, files in os.walk(root_dir):
                          if path.find('.hg') == -1:
                              for f in files:
                                  index_paths_.add(jn(path, f))
                      return index_paths_
                  def add_doc(self, writer, path, repo):
                      """Adding doc to writer"""
                      ext = unicode(path.split('/')[-1].split('.')[-1].lower())
                      #we just index the content of choosen files
                      if ext in INDEX_EXTENSIONS:
                          log.debug('    >> %s [WITH CONTENT]' % path)
                          fobj = open(path, 'rb')
                          content = fobj.read()
                          fobj.close()
-                         try:
-                             u_content = unicode(content)
-                         except UnicodeDecodeError:
-                             #incase we have a decode error just represent as byte string
-                             u_content = unicode(str(content).encode('string_escape'))
+                         u_content = safe_unicode(content)
                      else:
                          log.debug('    >> %s' % path)
                          #just index file name without it's content
                          u_content = u''
                      try:
                          os.stat(path)
                          writer.add_document(owner=unicode(repo.contact),
                                          repository=u"%s" % repo.name,
                                          path=u"%s" % path,
                                          content=u_content,
                                          modtime=os.path.getmtime(path),
                                          extension=ext)
                      except OSError, e:
                          import errno
                          if e.errno == errno.ENOENT:
                              log.debug('path %s does not exist or is a broken symlink' % path)
                          else:
                              raise e
                  def build_index(self):
                      if os.path.exists(IDX_LOCATION):
                          log.debug('removing previos index')
                          rmtree(IDX_LOCATION)
                      if not os.path.exists(IDX_LOCATION):
                          os.mkdir(IDX_LOCATION)
                      idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
                      writer = idx.writer()
                      for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
                          log.debug('building index @ %s' % repo.path)
                          for idx_path in self.get_paths(repo.path):
                              self.add_doc(writer, idx_path, repo)
                      writer.commit(merge=True)
                      log.debug('>>> FINISHED BUILDING INDEX <<<')
                  def update_index(self):
                      log.debug('STARTING INCREMENTAL INDEXING UPDATE')
                      idx = open_dir(IDX_LOCATION, indexname=self.indexname)
                      # The set of all paths in the index
                      indexed_paths = set()
                      # The set of all paths we need to re-index
                      to_index = set()
                      reader = idx.reader()
                      writer = idx.writer()
                      # Loop over the stored fields in the index
                      for fields in reader.all_stored_fields():
                          indexed_path = fields['path']
                          indexed_paths.add(indexed_path)
                          if not os.path.exists(indexed_path):
                              # This file was deleted since it was indexed
                              log.debug('removing from index %s' % indexed_path)
                              writer.delete_by_term('path', indexed_path)
                          else:
                              # Check if this file was changed since it
                              # was indexed
                              indexed_time = fields['modtime']
                              mtime = os.path.getmtime(indexed_path)
                              if mtime > indexed_time:
                                  # The file has changed, delete it and add it to the list of
                                  # files to reindex
                                  log.debug('adding to reindex list %s' % indexed_path)
                                  writer.delete_by_term('path', indexed_path)
                                  to_index.add(indexed_path)
                                  #writer.commit()
                      # Loop over the files in the filesystem
                      # Assume we have a function that gathers the filenames of the
                      # documents to be indexed
                      for repo in scan_paths(self.repo_location).values():
                          for path in self.get_paths(repo.path):
                              if path in to_index or path not in indexed_paths:
                                  # This is either a file that's changed, or a new file
                                  # that wasn't indexed before. So index it!
                                  self.add_doc(writer, path, repo)
                                  log.debug('reindexing %s' % path)
                      writer.commit(merge=True)
                      #idx.optimize()
                      log.debug('>>> FINISHED <<<')
                  def run(self, full_index=False):
                      """Run daemon"""
                      if full_index:
                          self.build_index()
                      else:
                          self.update_index()
              if __name__ == "__main__":
                  repo_location = '/home/marcink/hg_repos/*'
                  full_index = True # False means looking just for changes
                  try:
                      l = DaemonLock()
                      WhooshIndexingDaemon(repo_location=repo_location)\
                          .run(full_index=full_index)
                      l.release()
                  except LockHeld:
                      sys.exit(1)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages