##// END OF EJS Templates
added safe unicode funtion, and implemented it in whoosh indexer
added safe unicode funtion, and implemented it in whoosh indexer

File last commit:

r436:28f19fa5 default
r443:e5157e2a default
Show More
__init__.py
40 lines | 1.6 KiB | text/x-python | PythonLexer
import sys
import os
from pidlock import LockHeld, DaemonLock
import traceback
from os.path import dirname as dn
from os.path import join as jn
#to get the pylons_app import
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
from pylons_app.config.environment import load_environment
from pylons_app.model.hg_model import HgModel
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
from whoosh.fields import TEXT, ID, STORED, Schema
from whoosh.index import create_in, open_dir
from shutil import rmtree
#LOCATION WE KEEP THE INDEX
IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', 'h',
'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml','xsl','xslt',
'yaws']
#CUSTOM ANALYZER wordsplit + lowercase filter
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
#INDEX SCHEMA DEFINITION
SCHEMA = Schema(owner=TEXT(),
repository=TEXT(stored=True),
path=ID(stored=True, unique=True),
content=TEXT(stored=True, analyzer=ANALYZER),
modtime=STORED(),extension=TEXT(stored=True))
IDX_NAME = 'HG_INDEX'