##// END OF EJS Templates
moved LANGUAGE_EXTENSION_MAP to lib, and made whoosh indexer use the same map
marcink -
r1302:f0e90465 beta
parent child Browse files
Show More
@@ -36,7 +36,7 b' news'
36 - #109 Repository can be created from external Mercurial link (aka. remote
36 - #109 Repository can be created from external Mercurial link (aka. remote
37 repository, and manually updated (via pull) from admin panel
37 repository, and manually updated (via pull) from admin panel
38 - beta git support - push/pull server + basic view for git repos
38 - beta git support - push/pull server + basic view for git repos
39 - added followers page
39 - added followers page and forks page
40
40
41 fixes
41 fixes
42 -----
42 -----
@@ -54,6 +54,8 b' fixes'
54 - #150 fixes for errors on repositories mapped in db but corrupted in
54 - #150 fixes for errors on repositories mapped in db but corrupted in
55 filesystem
55 filesystem
56 - fixed problem with ascendant characters in realm #181
56 - fixed problem with ascendant characters in realm #181
57 - fixed problem with sqlite file based database connection pool
58 - whoosh indexer and code stats share the same dynamic extensions map
57
59
58 1.1.8 (**2011-04-12**)
60 1.1.8 (**2011-04-12**)
59 ======================
61 ======================
@@ -24,6 +24,48 b''
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25
25
26
26
27
28 def __get_lem():
29 from pygments import lexers
30 from string import lower
31 from collections import defaultdict
32
33 d = defaultdict(lambda: [])
34
35 def __clean(s):
36 s = s.lstrip('*')
37 s = s.lstrip('.')
38
39 if s.find('[') != -1:
40 exts = []
41 start, stop = s.find('['), s.find(']')
42
43 for suffix in s[start + 1:stop]:
44 exts.append(s[:s.find('[')] + suffix)
45 return map(lower, exts)
46 else:
47 return map(lower, [s])
48
49 for lx, t in sorted(lexers.LEXERS.items()):
50 m = map(__clean, t[-2])
51 if m:
52 m = reduce(lambda x, y: x + y, m)
53 for ext in m:
54 desc = lx.replace('Lexer', '')
55 d[ext].append(desc)
56
57 return dict(d)
58
59 # language map is also used by whoosh indexer, which for those specified
60 # extensions will index it's content
61 LANGUAGES_EXTENSIONS_MAP = __get_lem()
62
63 #Additional mappings that are not present in the pygments lexers
64 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
65 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
66
67 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
68
27 def str2bool(_str):
69 def str2bool(_str):
28 """
70 """
29 returs True/False value from given string, it tries to translate the
71 returs True/False value from given string, it tries to translate the
@@ -31,12 +31,12 b' import logging'
31
31
32 from time import mktime
32 from time import mktime
33 from operator import itemgetter
33 from operator import itemgetter
34 from pygments import lexers
35 from string import lower
34 from string import lower
36
35
37 from pylons import config
36 from pylons import config
38 from pylons.i18n.translation import _
37 from pylons.i18n.translation import _
39
38
39 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
40 from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
40 from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
41 __get_lockkey, LockHeld, DaemonLock
41 __get_lockkey, LockHeld, DaemonLock
42 from rhodecode.lib.helpers import person
42 from rhodecode.lib.helpers import person
@@ -63,41 +63,6 b' except ImportError:'
63
63
64 CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
64 CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
65
65
66 LANGUAGES_EXTENSIONS_MAP = {}
67
68
69 def __clean(s):
70
71 s = s.lstrip('*')
72 s = s.lstrip('.')
73
74 if s.find('[') != -1:
75 exts = []
76 start, stop = s.find('['), s.find(']')
77
78 for suffix in s[start + 1:stop]:
79 exts.append(s[:s.find('[')] + suffix)
80 return map(lower, exts)
81 else:
82 return map(lower, [s])
83
84 for lx, t in sorted(lexers.LEXERS.items()):
85 m = map(__clean, t[-2])
86 if m:
87 m = reduce(lambda x, y: x + y, m)
88 for ext in m:
89 desc = lx.replace('Lexer', '')
90 if ext in LANGUAGES_EXTENSIONS_MAP:
91 if desc not in LANGUAGES_EXTENSIONS_MAP[ext]:
92 LANGUAGES_EXTENSIONS_MAP[ext].append(desc)
93 else:
94 LANGUAGES_EXTENSIONS_MAP[ext] = [desc]
95
96 #Additional mappings that are not present in the pygments lexers
97 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
98 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
99
100 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
101
66
102
67
103 def get_session():
68 def get_session():
@@ -31,17 +31,7 b' from os.path import dirname as dn, join '
31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
32
32
33 from string import strip
33 from string import strip
34
35 from rhodecode.model import init_model
36 from rhodecode.model.scm import ScmModel
37 from rhodecode.config.environment import load_environment
38 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
39
40 from shutil import rmtree
34 from shutil import rmtree
41 from webhelpers.html.builder import escape
42 from vcs.utils.lazy import LazyProperty
43
44 from sqlalchemy import engine_from_config
45
35
46 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
36 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
47 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
@@ -49,15 +39,18 b' from whoosh.index import create_in, open'
49 from whoosh.formats import Characters
39 from whoosh.formats import Characters
50 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
40 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
51
41
42 from webhelpers.html.builder import escape
43 from sqlalchemy import engine_from_config
44 from vcs.utils.lazy import LazyProperty
45
46 from rhodecode.model import init_model
47 from rhodecode.model.scm import ScmModel
48 from rhodecode.config.environment import load_environment
49 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
50 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
52
51
53 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
52 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
54 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
53 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
55 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
56 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
57 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
58 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
59 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
60 'yaws']
61
54
62 #CUSTOM ANALYZER wordsplit + lowercase filter
55 #CUSTOM ANALYZER wordsplit + lowercase filter
63 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
56 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
@@ -203,8 +196,9 b' class ResultWrapper(object):'
203 Smart function that implements chunking the content
196 Smart function that implements chunking the content
204 but not overlap chunks so it doesn't highlight the same
197 but not overlap chunks so it doesn't highlight the same
205 close occurrences twice.
198 close occurrences twice.
206 @param matcher:
199
207 @param size:
200 :param matcher:
201 :param size:
208 """
202 """
209 memory = [(0, 0)]
203 memory = [(0, 0)]
210 for span in self.matcher.spans():
204 for span in self.matcher.spans():
General Comments 0
You need to be logged in to leave comments. Login now