##// END OF EJS Templates
Updated contributors and fixed index line length
marcink -
r1409:c3172bc0 beta
parent child Browse files
Show More
@@ -1,7 +1,9 b''
1 List of contributors to RhodeCode project:
1 List of contributors to RhodeCode project:
2 Marcin Kuźmiński <marcin@python-works.com>
2 Marcin Kuźmiński <marcin@python-works.com>
3 Lukasz Balcerzak <lukaszbalcerzak@gmail.com>
3 Lukasz Balcerzak <lukaszbalcerzak@gmail.com>
4 Jason Harris <jason@jasonfharris.com>
4 Jason Harris <jason@jasonfharris.com>
5 Thayne Harbaugh <thayne@fusionio.com>
5 Thayne Harbaugh <thayne@fusionio.com>
6 cejones
6 cejones
7 Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it> No newline at end of file
7 Lorenzo M. Catucci <lorenzo@sancho.ccd.uniroma2.it>
8 Dmitri Kuznetsov
9 Jared Bunting <jared.bunting@peachjean.com> No newline at end of file
@@ -1,225 +1,226 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.indexers.__init__
3 rhodecode.lib.indexers.__init__
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
5
6 Whoosh indexing module for RhodeCode
6 Whoosh indexing module for RhodeCode
7
7
8 :created_on: Aug 17, 2010
8 :created_on: Aug 17, 2010
9 :author: marcink
9 :author: marcink
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
11 :license: GPLv3, see COPYING for more details.
11 :license: GPLv3, see COPYING for more details.
12 """
12 """
13 # This program is free software: you can redistribute it and/or modify
13 # This program is free software: you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation, either version 3 of the License, or
15 # the Free Software Foundation, either version 3 of the License, or
16 # (at your option) any later version.
16 # (at your option) any later version.
17 #
17 #
18 # This program is distributed in the hope that it will be useful,
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
21 # GNU General Public License for more details.
22 #
22 #
23 # You should have received a copy of the GNU General Public License
23 # You should have received a copy of the GNU General Public License
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 import os
25 import os
26 import sys
26 import sys
27 import traceback
27 import traceback
28 from os.path import dirname as dn, join as jn
28 from os.path import dirname as dn, join as jn
29
29
30 #to get the rhodecode import
30 #to get the rhodecode import
31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
32
32
33 from string import strip
33 from string import strip
34 from shutil import rmtree
34 from shutil import rmtree
35
35
36 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
36 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
38 from whoosh.index import create_in, open_dir
38 from whoosh.index import create_in, open_dir
39 from whoosh.formats import Characters
39 from whoosh.formats import Characters
40 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
40 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
41
41
42 from webhelpers.html.builder import escape
42 from webhelpers.html.builder import escape
43 from sqlalchemy import engine_from_config
43 from sqlalchemy import engine_from_config
44 from vcs.utils.lazy import LazyProperty
44 from vcs.utils.lazy import LazyProperty
45
45
46 from rhodecode.model import init_model
46 from rhodecode.model import init_model
47 from rhodecode.model.scm import ScmModel
47 from rhodecode.model.scm import ScmModel
48 from rhodecode.model.repo import RepoModel
48 from rhodecode.model.repo import RepoModel
49 from rhodecode.config.environment import load_environment
49 from rhodecode.config.environment import load_environment
50 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
50 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
52
52
53 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
53 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
54 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
54 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
55
55
56 #CUSTOM ANALYZER wordsplit + lowercase filter
56 #CUSTOM ANALYZER wordsplit + lowercase filter
57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
58
58
59
59
60 #INDEX SCHEMA DEFINITION
60 #INDEX SCHEMA DEFINITION
61 SCHEMA = Schema(owner=TEXT(),
61 SCHEMA = Schema(owner=TEXT(),
62 repository=TEXT(stored=True),
62 repository=TEXT(stored=True),
63 path=TEXT(stored=True),
63 path=TEXT(stored=True),
64 content=FieldType(format=Characters(ANALYZER),
64 content=FieldType(format=Characters(ANALYZER),
65 scorable=True, stored=True),
65 scorable=True, stored=True),
66 modtime=STORED(), extension=TEXT(stored=True))
66 modtime=STORED(), extension=TEXT(stored=True))
67
67
68
68
69 IDX_NAME = 'HG_INDEX'
69 IDX_NAME = 'HG_INDEX'
70 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
70 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
71 FRAGMENTER = SimpleFragmenter(200)
71 FRAGMENTER = SimpleFragmenter(200)
72
72
73
73
74 class MakeIndex(BasePasterCommand):
74 class MakeIndex(BasePasterCommand):
75
75
76 max_args = 1
76 max_args = 1
77 min_args = 1
77 min_args = 1
78
78
79 usage = "CONFIG_FILE"
79 usage = "CONFIG_FILE"
80 summary = "Creates index for full text search given configuration file"
80 summary = "Creates index for full text search given configuration file"
81 group_name = "RhodeCode"
81 group_name = "RhodeCode"
82 takes_config_file = -1
82 takes_config_file = -1
83 parser = Command.standard_parser(verbose=True)
83 parser = Command.standard_parser(verbose=True)
84
84
85 def command(self):
85 def command(self):
86
86
87 from pylons import config
87 from pylons import config
88 add_cache(config)
88 add_cache(config)
89 engine = engine_from_config(config, 'sqlalchemy.db1.')
89 engine = engine_from_config(config, 'sqlalchemy.db1.')
90 init_model(engine)
90 init_model(engine)
91
91
92 index_location = config['index_dir']
92 index_location = config['index_dir']
93 repo_location = self.options.repo_location if self.options.repo_location else RepoModel().repos_path
93 repo_location = self.options.repo_location \
94 if self.options.repo_location else RepoModel().repos_path
94 repo_list = map(strip, self.options.repo_list.split(',')) \
95 repo_list = map(strip, self.options.repo_list.split(',')) \
95 if self.options.repo_list else None
96 if self.options.repo_list else None
96
97
97 #======================================================================
98 #======================================================================
98 # WHOOSH DAEMON
99 # WHOOSH DAEMON
99 #======================================================================
100 #======================================================================
100 from rhodecode.lib.pidlock import LockHeld, DaemonLock
101 from rhodecode.lib.pidlock import LockHeld, DaemonLock
101 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
102 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
102 try:
103 try:
103 l = DaemonLock(file=jn(dn(dn(index_location)), 'make_index.lock'))
104 l = DaemonLock(file=jn(dn(dn(index_location)), 'make_index.lock'))
104 WhooshIndexingDaemon(index_location=index_location,
105 WhooshIndexingDaemon(index_location=index_location,
105 repo_location=repo_location,
106 repo_location=repo_location,
106 repo_list=repo_list)\
107 repo_list=repo_list)\
107 .run(full_index=self.options.full_index)
108 .run(full_index=self.options.full_index)
108 l.release()
109 l.release()
109 except LockHeld:
110 except LockHeld:
110 sys.exit(1)
111 sys.exit(1)
111
112
112 def update_parser(self):
113 def update_parser(self):
113 self.parser.add_option('--repo-location',
114 self.parser.add_option('--repo-location',
114 action='store',
115 action='store',
115 dest='repo_location',
116 dest='repo_location',
116 help="Specifies repositories location to index OPTIONAL",
117 help="Specifies repositories location to index OPTIONAL",
117 )
118 )
118 self.parser.add_option('--index-only',
119 self.parser.add_option('--index-only',
119 action='store',
120 action='store',
120 dest='repo_list',
121 dest='repo_list',
121 help="Specifies a comma separated list of repositores "
122 help="Specifies a comma separated list of repositores "
122 "to build index on OPTIONAL",
123 "to build index on OPTIONAL",
123 )
124 )
124 self.parser.add_option('-f',
125 self.parser.add_option('-f',
125 action='store_true',
126 action='store_true',
126 dest='full_index',
127 dest='full_index',
127 help="Specifies that index should be made full i.e"
128 help="Specifies that index should be made full i.e"
128 " destroy old and build from scratch",
129 " destroy old and build from scratch",
129 default=False)
130 default=False)
130
131
131 class ResultWrapper(object):
132 class ResultWrapper(object):
132 def __init__(self, search_type, searcher, matcher, highlight_items):
133 def __init__(self, search_type, searcher, matcher, highlight_items):
133 self.search_type = search_type
134 self.search_type = search_type
134 self.searcher = searcher
135 self.searcher = searcher
135 self.matcher = matcher
136 self.matcher = matcher
136 self.highlight_items = highlight_items
137 self.highlight_items = highlight_items
137 self.fragment_size = 200 / 2
138 self.fragment_size = 200 / 2
138
139
139 @LazyProperty
140 @LazyProperty
140 def doc_ids(self):
141 def doc_ids(self):
141 docs_id = []
142 docs_id = []
142 while self.matcher.is_active():
143 while self.matcher.is_active():
143 docnum = self.matcher.id()
144 docnum = self.matcher.id()
144 chunks = [offsets for offsets in self.get_chunks()]
145 chunks = [offsets for offsets in self.get_chunks()]
145 docs_id.append([docnum, chunks])
146 docs_id.append([docnum, chunks])
146 self.matcher.next()
147 self.matcher.next()
147 return docs_id
148 return docs_id
148
149
149 def __str__(self):
150 def __str__(self):
150 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
151 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
151
152
152 def __repr__(self):
153 def __repr__(self):
153 return self.__str__()
154 return self.__str__()
154
155
155 def __len__(self):
156 def __len__(self):
156 return len(self.doc_ids)
157 return len(self.doc_ids)
157
158
158 def __iter__(self):
159 def __iter__(self):
159 """
160 """
160 Allows Iteration over results,and lazy generate content
161 Allows Iteration over results,and lazy generate content
161
162
162 *Requires* implementation of ``__getitem__`` method.
163 *Requires* implementation of ``__getitem__`` method.
163 """
164 """
164 for docid in self.doc_ids:
165 for docid in self.doc_ids:
165 yield self.get_full_content(docid)
166 yield self.get_full_content(docid)
166
167
167 def __getitem__(self, key):
168 def __getitem__(self, key):
168 """
169 """
169 Slicing of resultWrapper
170 Slicing of resultWrapper
170 """
171 """
171 i, j = key.start, key.stop
172 i, j = key.start, key.stop
172
173
173 slice = []
174 slice = []
174 for docid in self.doc_ids[i:j]:
175 for docid in self.doc_ids[i:j]:
175 slice.append(self.get_full_content(docid))
176 slice.append(self.get_full_content(docid))
176 return slice
177 return slice
177
178
178
179
179 def get_full_content(self, docid):
180 def get_full_content(self, docid):
180 res = self.searcher.stored_fields(docid[0])
181 res = self.searcher.stored_fields(docid[0])
181 f_path = res['path'][res['path'].find(res['repository']) \
182 f_path = res['path'][res['path'].find(res['repository']) \
182 + len(res['repository']):].lstrip('/')
183 + len(res['repository']):].lstrip('/')
183
184
184 content_short = self.get_short_content(res, docid[1])
185 content_short = self.get_short_content(res, docid[1])
185 res.update({'content_short':content_short,
186 res.update({'content_short':content_short,
186 'content_short_hl':self.highlight(content_short),
187 'content_short_hl':self.highlight(content_short),
187 'f_path':f_path})
188 'f_path':f_path})
188
189
189 return res
190 return res
190
191
191 def get_short_content(self, res, chunks):
192 def get_short_content(self, res, chunks):
192
193
193 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
194 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
194
195
195 def get_chunks(self):
196 def get_chunks(self):
196 """
197 """
197 Smart function that implements chunking the content
198 Smart function that implements chunking the content
198 but not overlap chunks so it doesn't highlight the same
199 but not overlap chunks so it doesn't highlight the same
199 close occurrences twice.
200 close occurrences twice.
200
201
201 :param matcher:
202 :param matcher:
202 :param size:
203 :param size:
203 """
204 """
204 memory = [(0, 0)]
205 memory = [(0, 0)]
205 for span in self.matcher.spans():
206 for span in self.matcher.spans():
206 start = span.startchar or 0
207 start = span.startchar or 0
207 end = span.endchar or 0
208 end = span.endchar or 0
208 start_offseted = max(0, start - self.fragment_size)
209 start_offseted = max(0, start - self.fragment_size)
209 end_offseted = end + self.fragment_size
210 end_offseted = end + self.fragment_size
210
211
211 if start_offseted < memory[-1][1]:
212 if start_offseted < memory[-1][1]:
212 start_offseted = memory[-1][1]
213 start_offseted = memory[-1][1]
213 memory.append((start_offseted, end_offseted,))
214 memory.append((start_offseted, end_offseted,))
214 yield (start_offseted, end_offseted,)
215 yield (start_offseted, end_offseted,)
215
216
216 def highlight(self, content, top=5):
217 def highlight(self, content, top=5):
217 if self.search_type != 'content':
218 if self.search_type != 'content':
218 return ''
219 return ''
219 hl = highlight(escape(content),
220 hl = highlight(escape(content),
220 self.highlight_items,
221 self.highlight_items,
221 analyzer=ANALYZER,
222 analyzer=ANALYZER,
222 fragmenter=FRAGMENTER,
223 fragmenter=FRAGMENTER,
223 formatter=FORMATTER,
224 formatter=FORMATTER,
224 top=top)
225 top=top)
225 return hl
226 return hl
General Comments 0
You need to be logged in to leave comments. Login now