##// END OF EJS Templates
fixed issue with latest webhelpers pagination module
marcink -
r1198:02a7f263 beta
parent child Browse files
Show More
@@ -1,230 +1,232 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.indexers.__init__
3 rhodecode.lib.indexers.__init__
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
5
6 Whoosh indexing module for RhodeCode
6 Whoosh indexing module for RhodeCode
7
7
8 :created_on: Aug 17, 2010
8 :created_on: Aug 17, 2010
9 :author: marcink
9 :author: marcink
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
11 :license: GPLv3, see COPYING for more details.
11 :license: GPLv3, see COPYING for more details.
12 """
12 """
13 # This program is free software; you can redistribute it and/or
13 # This program is free software; you can redistribute it and/or
14 # modify it under the terms of the GNU General Public License
14 # modify it under the terms of the GNU General Public License
15 # as published by the Free Software Foundation; version 2
15 # as published by the Free Software Foundation; version 2
16 # of the License or (at your opinion) any later version of the license.
16 # of the License or (at your opinion) any later version of the license.
17 #
17 #
18 # This program is distributed in the hope that it will be useful,
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
21 # GNU General Public License for more details.
22 #
22 #
23 # You should have received a copy of the GNU General Public License
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 # MA 02110-1301, USA.
26 # MA 02110-1301, USA.
27 import os
27 import os
28 import sys
28 import sys
29 import traceback
29 import traceback
30 from os.path import dirname as dn, join as jn
30 from os.path import dirname as dn, join as jn
31
31
32 #to get the rhodecode import
32 #to get the rhodecode import
33 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
33 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
34
34
35 from string import strip
35 from string import strip
36
36
37 from rhodecode.model import init_model
37 from rhodecode.model import init_model
38 from rhodecode.model.scm import ScmModel
38 from rhodecode.model.scm import ScmModel
39 from rhodecode.config.environment import load_environment
39 from rhodecode.config.environment import load_environment
40 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
40 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
41
41
42 from shutil import rmtree
42 from shutil import rmtree
43 from webhelpers.html.builder import escape
43 from webhelpers.html.builder import escape
44 from vcs.utils.lazy import LazyProperty
44 from vcs.utils.lazy import LazyProperty
45
45
46 from sqlalchemy import engine_from_config
46 from sqlalchemy import engine_from_config
47
47
48 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
48 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
49 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
49 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
50 from whoosh.index import create_in, open_dir
50 from whoosh.index import create_in, open_dir
51 from whoosh.formats import Characters
51 from whoosh.formats import Characters
52 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
52 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
53
53
54
54
55 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
55 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
56 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
56 INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
57 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
57 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
58 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
58 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
59 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
59 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
60 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
60 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
61 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
61 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
62 'yaws']
62 'yaws']
63
63
64 #CUSTOM ANALYZER wordsplit + lowercase filter
64 #CUSTOM ANALYZER wordsplit + lowercase filter
65 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
65 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
66
66
67
67
68 #INDEX SCHEMA DEFINITION
68 #INDEX SCHEMA DEFINITION
69 SCHEMA = Schema(owner=TEXT(),
69 SCHEMA = Schema(owner=TEXT(),
70 repository=TEXT(stored=True),
70 repository=TEXT(stored=True),
71 path=TEXT(stored=True),
71 path=TEXT(stored=True),
72 content=FieldType(format=Characters(ANALYZER),
72 content=FieldType(format=Characters(ANALYZER),
73 scorable=True, stored=True),
73 scorable=True, stored=True),
74 modtime=STORED(), extension=TEXT(stored=True))
74 modtime=STORED(), extension=TEXT(stored=True))
75
75
76
76
77 IDX_NAME = 'HG_INDEX'
77 IDX_NAME = 'HG_INDEX'
78 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
78 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
79 FRAGMENTER = SimpleFragmenter(200)
79 FRAGMENTER = SimpleFragmenter(200)
80
80
81
81
82 class MakeIndex(BasePasterCommand):
82 class MakeIndex(BasePasterCommand):
83
83
84 max_args = 1
84 max_args = 1
85 min_args = 1
85 min_args = 1
86
86
87 usage = "CONFIG_FILE"
87 usage = "CONFIG_FILE"
88 summary = "Creates index for full text search given configuration file"
88 summary = "Creates index for full text search given configuration file"
89 group_name = "RhodeCode"
89 group_name = "RhodeCode"
90 takes_config_file = -1
90 takes_config_file = -1
91 parser = Command.standard_parser(verbose=True)
91 parser = Command.standard_parser(verbose=True)
92
92
93 def command(self):
93 def command(self):
94
94
95 from pylons import config
95 from pylons import config
96 add_cache(config)
96 add_cache(config)
97 engine = engine_from_config(config, 'sqlalchemy.db1.')
97 engine = engine_from_config(config, 'sqlalchemy.db1.')
98 init_model(engine)
98 init_model(engine)
99
99
100 index_location = config['index_dir']
100 index_location = config['index_dir']
101 repo_location = self.options.repo_location
101 repo_location = self.options.repo_location
102 repo_list = map(strip, self.options.repo_list.split(',')) \
102 repo_list = map(strip, self.options.repo_list.split(',')) \
103 if self.options.repo_list else None
103 if self.options.repo_list else None
104
104
105 #======================================================================
105 #======================================================================
106 # WHOOSH DAEMON
106 # WHOOSH DAEMON
107 #======================================================================
107 #======================================================================
108 from rhodecode.lib.pidlock import LockHeld, DaemonLock
108 from rhodecode.lib.pidlock import LockHeld, DaemonLock
109 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
109 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
110 try:
110 try:
111 l = DaemonLock()
111 l = DaemonLock()
112 WhooshIndexingDaemon(index_location=index_location,
112 WhooshIndexingDaemon(index_location=index_location,
113 repo_location=repo_location,
113 repo_location=repo_location,
114 repo_list=repo_list)\
114 repo_list=repo_list)\
115 .run(full_index=self.options.full_index)
115 .run(full_index=self.options.full_index)
116 l.release()
116 l.release()
117 except LockHeld:
117 except LockHeld:
118 sys.exit(1)
118 sys.exit(1)
119
119
120 def update_parser(self):
120 def update_parser(self):
121 self.parser.add_option('--repo-location',
121 self.parser.add_option('--repo-location',
122 action='store',
122 action='store',
123 dest='repo_location',
123 dest='repo_location',
124 help="Specifies repositories location to index REQUIRED",
124 help="Specifies repositories location to index REQUIRED",
125 )
125 )
126 self.parser.add_option('--index-only',
126 self.parser.add_option('--index-only',
127 action='store',
127 action='store',
128 dest='repo_list',
128 dest='repo_list',
129 help="Specifies a comma separated list of repositores "
129 help="Specifies a comma separated list of repositores "
130 "to build index on OPTIONAL",
130 "to build index on OPTIONAL",
131 )
131 )
132 self.parser.add_option('-f',
132 self.parser.add_option('-f',
133 action='store_true',
133 action='store_true',
134 dest='full_index',
134 dest='full_index',
135 help="Specifies that index should be made full i.e"
135 help="Specifies that index should be made full i.e"
136 " destroy old and build from scratch",
136 " destroy old and build from scratch",
137 default=False)
137 default=False)
138
138
139 class ResultWrapper(object):
139 class ResultWrapper(object):
140 def __init__(self, search_type, searcher, matcher, highlight_items):
140 def __init__(self, search_type, searcher, matcher, highlight_items):
141 self.search_type = search_type
141 self.search_type = search_type
142 self.searcher = searcher
142 self.searcher = searcher
143 self.matcher = matcher
143 self.matcher = matcher
144 self.highlight_items = highlight_items
144 self.highlight_items = highlight_items
145 self.fragment_size = 200 / 2
145 self.fragment_size = 200 / 2
146
146
147 @LazyProperty
147 @LazyProperty
148 def doc_ids(self):
148 def doc_ids(self):
149 docs_id = []
149 docs_id = []
150 while self.matcher.is_active():
150 while self.matcher.is_active():
151 docnum = self.matcher.id()
151 docnum = self.matcher.id()
152 chunks = [offsets for offsets in self.get_chunks()]
152 chunks = [offsets for offsets in self.get_chunks()]
153 docs_id.append([docnum, chunks])
153 docs_id.append([docnum, chunks])
154 self.matcher.next()
154 self.matcher.next()
155 return docs_id
155 return docs_id
156
156
157 def __str__(self):
157 def __str__(self):
158 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
158 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
159
159
160 def __repr__(self):
160 def __repr__(self):
161 return self.__str__()
161 return self.__str__()
162
162
163 def __len__(self):
163 def __len__(self):
164 return len(self.doc_ids)
164 return len(self.doc_ids)
165
165
166 def __iter__(self):
166 def __iter__(self):
167 """
167 """
168 Allows Iteration over results,and lazy generate content
168 Allows Iteration over results,and lazy generate content
169
169
170 *Requires* implementation of ``__getitem__`` method.
170 *Requires* implementation of ``__getitem__`` method.
171 """
171 """
172 for docid in self.doc_ids:
172 for docid in self.doc_ids:
173 yield self.get_full_content(docid)
173 yield self.get_full_content(docid)
174
174
175 def __getslice__(self, i, j):
175 def __getitem__(self, key):
176 """
176 """
177 Slicing of resultWrapper
177 Slicing of resultWrapper
178 """
178 """
179 i, j = key.start, key.stop
180
179 slice = []
181 slice = []
180 for docid in self.doc_ids[i:j]:
182 for docid in self.doc_ids[i:j]:
181 slice.append(self.get_full_content(docid))
183 slice.append(self.get_full_content(docid))
182 return slice
184 return slice
183
185
184
186
185 def get_full_content(self, docid):
187 def get_full_content(self, docid):
186 res = self.searcher.stored_fields(docid[0])
188 res = self.searcher.stored_fields(docid[0])
187 f_path = res['path'][res['path'].find(res['repository']) \
189 f_path = res['path'][res['path'].find(res['repository']) \
188 + len(res['repository']):].lstrip('/')
190 + len(res['repository']):].lstrip('/')
189
191
190 content_short = self.get_short_content(res, docid[1])
192 content_short = self.get_short_content(res, docid[1])
191 res.update({'content_short':content_short,
193 res.update({'content_short':content_short,
192 'content_short_hl':self.highlight(content_short),
194 'content_short_hl':self.highlight(content_short),
193 'f_path':f_path})
195 'f_path':f_path})
194
196
195 return res
197 return res
196
198
197 def get_short_content(self, res, chunks):
199 def get_short_content(self, res, chunks):
198
200
199 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
201 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
200
202
201 def get_chunks(self):
203 def get_chunks(self):
202 """
204 """
203 Smart function that implements chunking the content
205 Smart function that implements chunking the content
204 but not overlap chunks so it doesn't highlight the same
206 but not overlap chunks so it doesn't highlight the same
205 close occurrences twice.
207 close occurrences twice.
206 @param matcher:
208 @param matcher:
207 @param size:
209 @param size:
208 """
210 """
209 memory = [(0, 0)]
211 memory = [(0, 0)]
210 for span in self.matcher.spans():
212 for span in self.matcher.spans():
211 start = span.startchar or 0
213 start = span.startchar or 0
212 end = span.endchar or 0
214 end = span.endchar or 0
213 start_offseted = max(0, start - self.fragment_size)
215 start_offseted = max(0, start - self.fragment_size)
214 end_offseted = end + self.fragment_size
216 end_offseted = end + self.fragment_size
215
217
216 if start_offseted < memory[-1][1]:
218 if start_offseted < memory[-1][1]:
217 start_offseted = memory[-1][1]
219 start_offseted = memory[-1][1]
218 memory.append((start_offseted, end_offseted,))
220 memory.append((start_offseted, end_offseted,))
219 yield (start_offseted, end_offseted,)
221 yield (start_offseted, end_offseted,)
220
222
221 def highlight(self, content, top=5):
223 def highlight(self, content, top=5):
222 if self.search_type != 'content':
224 if self.search_type != 'content':
223 return ''
225 return ''
224 hl = highlight(escape(content),
226 hl = highlight(escape(content),
225 self.highlight_items,
227 self.highlight_items,
226 analyzer=ANALYZER,
228 analyzer=ANALYZER,
227 fragmenter=FRAGMENTER,
229 fragmenter=FRAGMENTER,
228 formatter=FORMATTER,
230 formatter=FORMATTER,
229 top=top)
231 top=top)
230 return hl
232 return hl
General Comments 0
You need to be logged in to leave comments. Login now