##// END OF EJS Templates
bumbed whoosh to 2.3.X series...
marcink -
r1995:b6c902d8 beta
parent child Browse files
Show More
@@ -1,18 +1,18 b''
1 1 Pylons==1.0.0
2 2 Beaker==1.6.2
3 3 WebHelpers>=1.2
4 4 formencode==1.2.4
5 5 SQLAlchemy==0.7.4
6 6 Mako==0.5.0
7 7 pygments>=1.4
8 whoosh<1.8
8 whoosh<2.4
9 9 celery>=2.2.5,<2.3
10 10 babel
11 11 python-dateutil>=1.5.0,<2.0.0
12 12 dulwich>=0.8.0,<0.9.0
13 13 vcs>=0.2.3.dev
14 14 webob==1.0.8
15 15 markdown==2.0.3
16 16 docutils==0.8.1
17 17 py-bcrypt
18 18 mercurial>=2.1,<2.2 No newline at end of file
@@ -1,93 +1,93 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.__init__
4 4 ~~~~~~~~~~~~~~~~~~
5 5
6 6 RhodeCode, a web based repository management based on pylons
7 7 versioning implementation: http://semver.org/
8 8
9 9 :created_on: Apr 9, 2010
10 10 :author: marcink
11 11 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
12 12 :license: GPLv3, see COPYING for more details.
13 13 """
14 14 # This program is free software: you can redistribute it and/or modify
15 15 # it under the terms of the GNU General Public License as published by
16 16 # the Free Software Foundation, either version 3 of the License, or
17 17 # (at your option) any later version.
18 18 #
19 19 # This program is distributed in the hope that it will be useful,
20 20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 22 # GNU General Public License for more details.
23 23 #
24 24 # You should have received a copy of the GNU General Public License
25 25 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 26 import sys
27 27 import platform
28 28
29 29 VERSION = (1, 3, 0, 'beta')
30 30 __version__ = '.'.join((str(each) for each in VERSION[:4]))
31 31 __dbversion__ = 4 # defines current db version for migrations
32 32 __platform__ = platform.system()
33 33 __license__ = 'GPLv3'
34 34 __py_version__ = sys.version_info
35 35
36 36 PLATFORM_WIN = ('Windows')
37 37 PLATFORM_OTHERS = ('Linux', 'Darwin', 'FreeBSD', 'OpenBSD', 'SunOS')
38 38
39 39 requirements = [
40 40 "Pylons==1.0.0",
41 41 "Beaker==1.6.2",
42 42 "WebHelpers>=1.2",
43 43 "formencode==1.2.4",
44 44 "SQLAlchemy==0.7.4",
45 45 "Mako==0.5.0",
46 46 "pygments>=1.4",
47 "whoosh<1.8",
47 "whoosh<2.4",
48 48 "celery>=2.2.5,<2.3",
49 49 "babel",
50 50 "python-dateutil>=1.5.0,<2.0.0",
51 51 "dulwich>=0.8.0,<0.9.0",
52 52 "vcs>=0.2.3.dev",
53 53 "webob==1.0.8",
54 54 "markdown==2.0.3",
55 55 "docutils==0.8.1",
56 56 ]
57 57
58 58 if __py_version__ < (2, 6):
59 59 requirements.append("simplejson")
60 60 requirements.append("pysqlite")
61 61
62 62 if __platform__ in PLATFORM_WIN:
63 63 requirements.append("mercurial>=2.1,<2.2")
64 64 else:
65 65 requirements.append("py-bcrypt")
66 66 requirements.append("mercurial>=2.1,<2.2")
67 67
68 68
69 69 try:
70 70 from rhodecode.lib import get_current_revision
71 71 _rev = get_current_revision()
72 72 except ImportError:
73 73 # this is needed when doing some setup.py operations
74 74 _rev = False
75 75
76 76 if len(VERSION) > 3 and _rev:
77 77 __version__ += ' [rev:%s]' % _rev[0]
78 78
79 79
80 80 def get_version():
81 81 """Returns shorter version (digit parts only) as string."""
82 82
83 83 return '.'.join((str(each) for each in VERSION[:3]))
84 84
85 85 BACKENDS = {
86 86 'hg': 'Mercurial repository',
87 87 'git': 'Git repository',
88 88 }
89 89
90 90 CELERY_ON = False
91 91
92 92 # link to config for pylons
93 93 CONFIG = None
@@ -1,121 +1,126 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.controllers.search
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Search controller for rhodecode
7 7
8 8 :created_on: Aug 7, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 import logging
26 26 import traceback
27 27
28 28 from pylons.i18n.translation import _
29 from pylons import request, config, session, tmpl_context as c
29 from pylons import request, config, tmpl_context as c
30 30
31 31 from rhodecode.lib.auth import LoginRequired
32 32 from rhodecode.lib.base import BaseController, render
33 33 from rhodecode.lib.indexers import SCHEMA, IDX_NAME, ResultWrapper
34 34
35 35 from webhelpers.paginate import Page
36 36 from webhelpers.util import update_params
37 37
38 38 from whoosh.index import open_dir, EmptyIndexError
39 39 from whoosh.qparser import QueryParser, QueryParserError
40 40 from whoosh.query import Phrase, Wildcard, Term, Prefix
41 41
42 42 log = logging.getLogger(__name__)
43 43
44 44
45 45 class SearchController(BaseController):
46 46
47 47 @LoginRequired()
48 48 def __before__(self):
49 49 super(SearchController, self).__before__()
50 50
51 51 def index(self, search_repo=None):
52 52 c.repo_name = search_repo
53 53 c.formated_results = []
54 54 c.runtime = ''
55 55 c.cur_query = request.GET.get('q', None)
56 56 c.cur_type = request.GET.get('type', 'source')
57 57 c.cur_search = search_type = {'content': 'content',
58 58 'commit': 'content',
59 59 'path': 'path',
60 60 'repository': 'repository'}\
61 61 .get(c.cur_type, 'content')
62 62
63 63 if c.cur_query:
64 64 cur_query = c.cur_query.lower()
65 65
66 66 if c.cur_query:
67 67 p = int(request.params.get('page', 1))
68 68 highlight_items = set()
69 69 try:
70 70 idx = open_dir(config['app_conf']['index_dir'],
71 71 indexname=IDX_NAME)
72 72 searcher = idx.searcher()
73 73
74 74 qp = QueryParser(search_type, schema=SCHEMA)
75 75 if c.repo_name:
76 76 cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
77 77 try:
78 78 query = qp.parse(unicode(cur_query))
79
79 # extract words for highlight
80 80 if isinstance(query, Phrase):
81 81 highlight_items.update(query.words)
82 82 elif isinstance(query, Prefix):
83 83 highlight_items.add(query.text)
84 84 else:
85 85 for i in query.all_terms():
86 86 if i[0] == 'content':
87 87 highlight_items.add(i[1])
88 88
89 89 matcher = query.matcher(searcher)
90 90
91 91 log.debug(query)
92 92 log.debug(highlight_items)
93 93 results = searcher.search(query)
94 94 res_ln = len(results)
95 c.runtime = '%s results (%.3f seconds)' \
96 % (res_ln, results.runtime)
95 c.runtime = '%s results (%.3f seconds)' % (
96 res_ln, results.runtime
97 )
97 98
98 99 def url_generator(**kw):
99 100 return update_params("?q=%s&type=%s" \
100 101 % (c.cur_query, c.cur_search), **kw)
101 102
102 103 c.formated_results = Page(
103 104 ResultWrapper(search_type, searcher, matcher,
104 105 highlight_items),
105 page=p, item_count=res_ln,
106 items_per_page=10, url=url_generator)
106 page=p,
107 item_count=res_ln,
108 items_per_page=10,
109 url=url_generator
110 )
107 111
108 112 except QueryParserError:
109 113 c.runtime = _('Invalid search query. Try quoting it.')
110 114 searcher.close()
111 115 except (EmptyIndexError, IOError):
112 116 log.error(traceback.format_exc())
113 117 log.error('Empty Index data')
114 118 c.runtime = _('There is no index to search in. '
115 119 'Please run whoosh indexer')
116 120 except (Exception):
117 121 log.error(traceback.format_exc())
118 122 c.runtime = _('An error occurred during this search operation')
119 123
124
120 125 # Return a rendered template
121 126 return render('/search/search.html')
@@ -1,226 +1,229 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.__init__
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Whoosh indexing module for RhodeCode
7 7
8 8 :created_on: Aug 17, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25 import os
26 26 import sys
27 27 import traceback
28 28 from os.path import dirname as dn, join as jn
29 29
30 30 #to get the rhodecode import
31 31 sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
32 32
33 33 from string import strip
34 34 from shutil import rmtree
35 35
36 36 from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
37 37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
38 38 from whoosh.index import create_in, open_dir
39 39 from whoosh.formats import Characters
40 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
40 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
41 41
42 42 from webhelpers.html.builder import escape
43 43 from sqlalchemy import engine_from_config
44 from vcs.utils.lazy import LazyProperty
45 44
46 45 from rhodecode.model import init_model
47 46 from rhodecode.model.scm import ScmModel
48 47 from rhodecode.model.repo import RepoModel
49 48 from rhodecode.config.environment import load_environment
50 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
49 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, LazyProperty
51 50 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
52 51
53 52 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
54 53 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
55 54
56 55 #CUSTOM ANALYZER wordsplit + lowercase filter
57 56 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
58 57
59 58
60 59 #INDEX SCHEMA DEFINITION
61 SCHEMA = Schema(owner=TEXT(),
60 SCHEMA = Schema(
61 owner=TEXT(),
62 62 repository=TEXT(stored=True),
63 63 path=TEXT(stored=True),
64 content=FieldType(format=Characters(ANALYZER),
64 content=FieldType(format=Characters(), analyzer=ANALYZER,
65 65 scorable=True, stored=True),
66 modtime=STORED(), extension=TEXT(stored=True))
67
66 modtime=STORED(),
67 extension=TEXT(stored=True)
68 )
68 69
69 70 IDX_NAME = 'HG_INDEX'
70 71 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
71 FRAGMENTER = SimpleFragmenter(200)
72 FRAGMENTER = ContextFragmenter(200)
72 73
73 74
74 75 class MakeIndex(BasePasterCommand):
75 76
76 77 max_args = 1
77 78 min_args = 1
78 79
79 80 usage = "CONFIG_FILE"
80 81 summary = "Creates index for full text search given configuration file"
81 82 group_name = "RhodeCode"
82 83 takes_config_file = -1
83 84 parser = Command.standard_parser(verbose=True)
84 85
85 86 def command(self):
86 87
87 88 from pylons import config
88 89 add_cache(config)
89 90 engine = engine_from_config(config, 'sqlalchemy.db1.')
90 91 init_model(engine)
91 92
92 93 index_location = config['index_dir']
93 94 repo_location = self.options.repo_location \
94 95 if self.options.repo_location else RepoModel().repos_path
95 96 repo_list = map(strip, self.options.repo_list.split(',')) \
96 97 if self.options.repo_list else None
97 98
98 99 #======================================================================
99 100 # WHOOSH DAEMON
100 101 #======================================================================
101 102 from rhodecode.lib.pidlock import LockHeld, DaemonLock
102 103 from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
103 104 try:
104 105 l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
105 106 WhooshIndexingDaemon(index_location=index_location,
106 107 repo_location=repo_location,
107 108 repo_list=repo_list)\
108 109 .run(full_index=self.options.full_index)
109 110 l.release()
110 111 except LockHeld:
111 112 sys.exit(1)
112 113
113 114 def update_parser(self):
114 115 self.parser.add_option('--repo-location',
115 116 action='store',
116 117 dest='repo_location',
117 118 help="Specifies repositories location to index OPTIONAL",
118 119 )
119 120 self.parser.add_option('--index-only',
120 121 action='store',
121 122 dest='repo_list',
122 123 help="Specifies a comma separated list of repositores "
123 124 "to build index on OPTIONAL",
124 125 )
125 126 self.parser.add_option('-f',
126 127 action='store_true',
127 128 dest='full_index',
128 129 help="Specifies that index should be made full i.e"
129 130 " destroy old and build from scratch",
130 131 default=False)
131 132
132 133
133 134 class ResultWrapper(object):
134 135 def __init__(self, search_type, searcher, matcher, highlight_items):
135 136 self.search_type = search_type
136 137 self.searcher = searcher
137 138 self.matcher = matcher
138 139 self.highlight_items = highlight_items
139 self.fragment_size = 200 / 2
140 self.fragment_size = 200
140 141
141 142 @LazyProperty
142 143 def doc_ids(self):
143 144 docs_id = []
144 145 while self.matcher.is_active():
145 146 docnum = self.matcher.id()
146 147 chunks = [offsets for offsets in self.get_chunks()]
147 148 docs_id.append([docnum, chunks])
148 149 self.matcher.next()
149 150 return docs_id
150 151
151 152 def __str__(self):
152 153 return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
153 154
154 155 def __repr__(self):
155 156 return self.__str__()
156 157
157 158 def __len__(self):
158 159 return len(self.doc_ids)
159 160
160 161 def __iter__(self):
161 162 """
162 163 Allows Iteration over results,and lazy generate content
163 164
164 165 *Requires* implementation of ``__getitem__`` method.
165 166 """
166 167 for docid in self.doc_ids:
167 168 yield self.get_full_content(docid)
168 169
169 170 def __getitem__(self, key):
170 171 """
171 172 Slicing of resultWrapper
172 173 """
173 174 i, j = key.start, key.stop
174 175
175 slice = []
176 slices = []
176 177 for docid in self.doc_ids[i:j]:
177 slice.append(self.get_full_content(docid))
178 return slice
178 slices.append(self.get_full_content(docid))
179 return slices
179 180
180 181 def get_full_content(self, docid):
181 182 res = self.searcher.stored_fields(docid[0])
182 183 f_path = res['path'][res['path'].find(res['repository']) \
183 184 + len(res['repository']):].lstrip('/')
184 185
185 186 content_short = self.get_short_content(res, docid[1])
186 187 res.update({'content_short':content_short,
187 188 'content_short_hl':self.highlight(content_short),
188 189 'f_path':f_path})
189 190
190 191 return res
191 192
192 193 def get_short_content(self, res, chunks):
193 194
194 195 return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
195 196
196 197 def get_chunks(self):
197 198 """
198 199 Smart function that implements chunking the content
199 200 but not overlap chunks so it doesn't highlight the same
200 201 close occurrences twice.
201 202
202 203 :param matcher:
203 204 :param size:
204 205 """
205 206 memory = [(0, 0)]
206 207 for span in self.matcher.spans():
207 208 start = span.startchar or 0
208 209 end = span.endchar or 0
209 210 start_offseted = max(0, start - self.fragment_size)
210 211 end_offseted = end + self.fragment_size
211 212
212 213 if start_offseted < memory[-1][1]:
213 214 start_offseted = memory[-1][1]
214 215 memory.append((start_offseted, end_offseted,))
215 216 yield (start_offseted, end_offseted,)
216 217
217 218 def highlight(self, content, top=5):
218 219 if self.search_type != 'content':
219 220 return ''
220 hl = highlight(escape(content),
221 self.highlight_items,
221 hl = highlight(
222 text=escape(content),
223 terms=self.highlight_items,
222 224 analyzer=ANALYZER,
223 225 fragmenter=FRAGMENTER,
224 226 formatter=FORMATTER,
225 top=top)
227 top=top
228 )
226 229 return hl
@@ -1,238 +1,235 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.indexers.daemon
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 A daemon will read from task table and run tasks
7 7
8 8 :created_on: Jan 26, 2010
9 9 :author: marcink
10 10 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25
26 26 import os
27 27 import sys
28 28 import logging
29 29 import traceback
30 30
31 31 from shutil import rmtree
32 32 from time import mktime
33 33
34 34 from os.path import dirname as dn
35 35 from os.path import join as jn
36 36
37 37 #to get the rhodecode import
38 38 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
39 39 sys.path.append(project_path)
40 40
41 41
42 42 from rhodecode.model.scm import ScmModel
43 43 from rhodecode.lib import safe_unicode
44 44 from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME
45 45
46 46 from vcs.exceptions import ChangesetError, RepositoryError, \
47 47 NodeDoesNotExistError
48 48
49 49 from whoosh.index import create_in, open_dir
50 50
51 51
52
53 52 log = logging.getLogger('whooshIndexer')
54 53 # create logger
55 54 log.setLevel(logging.DEBUG)
56 55 log.propagate = False
57 56 # create console handler and set level to debug
58 57 ch = logging.StreamHandler()
59 58 ch.setLevel(logging.DEBUG)
60 59
61 60 # create formatter
62 61 formatter = logging.Formatter("%(asctime)s - %(name)s -"
63 62 " %(levelname)s - %(message)s")
64 63
65 64 # add formatter to ch
66 65 ch.setFormatter(formatter)
67 66
68 67 # add ch to logger
69 68 log.addHandler(ch)
70 69
70
71 71 class WhooshIndexingDaemon(object):
72 72 """
73 73 Daemon for atomic jobs
74 74 """
75 75
76 def __init__(self, indexname='HG_INDEX', index_location=None,
76 def __init__(self, indexname=IDX_NAME, index_location=None,
77 77 repo_location=None, sa=None, repo_list=None):
78 78 self.indexname = indexname
79 79
80 80 self.index_location = index_location
81 81 if not index_location:
82 82 raise Exception('You have to provide index location')
83 83
84 84 self.repo_location = repo_location
85 85 if not repo_location:
86 86 raise Exception('You have to provide repositories location')
87 87
88 88 self.repo_paths = ScmModel(sa).repo_scan(self.repo_location)
89 89
90 90 if repo_list:
91 91 filtered_repo_paths = {}
92 92 for repo_name, repo in self.repo_paths.items():
93 93 if repo_name in repo_list:
94 94 filtered_repo_paths[repo_name] = repo
95 95
96 96 self.repo_paths = filtered_repo_paths
97 97
98
99 98 self.initial = False
100 99 if not os.path.isdir(self.index_location):
101 100 os.makedirs(self.index_location)
102 101 log.info('Cannot run incremental index since it does not'
103 102 ' yet exist running full build')
104 103 self.initial = True
105 104
106 105 def get_paths(self, repo):
107 106 """recursive walk in root dir and return a set of all path in that dir
108 107 based on repository walk function
109 108 """
110 109 index_paths_ = set()
111 110 try:
112 111 tip = repo.get_changeset('tip')
113 112 for topnode, dirs, files in tip.walk('/'):
114 113 for f in files:
115 114 index_paths_.add(jn(repo.path, f.path))
116 115
117 116 except RepositoryError, e:
118 117 log.debug(traceback.format_exc())
119 118 pass
120 119 return index_paths_
121 120
122 121 def get_node(self, repo, path):
123 122 n_path = path[len(repo.path) + 1:]
124 123 node = repo.get_changeset().get_node(n_path)
125 124 return node
126 125
127 126 def get_node_mtime(self, node):
128 127 return mktime(node.last_changeset.date.timetuple())
129 128
130 129 def add_doc(self, writer, path, repo, repo_name):
131 130 """Adding doc to writer this function itself fetches data from
132 131 the instance of vcs backend"""
133 132 node = self.get_node(repo, path)
134 133
135 134 #we just index the content of chosen files, and skip binary files
136 135 if node.extension in INDEX_EXTENSIONS and not node.is_binary:
137 136
138 137 u_content = node.content
139 138 if not isinstance(u_content, unicode):
140 139 log.warning(' >> %s Could not get this content as unicode '
141 140 'replacing with empty content', path)
142 141 u_content = u''
143 142 else:
144 143 log.debug(' >> %s [WITH CONTENT]' % path)
145 144
146 145 else:
147 146 log.debug(' >> %s' % path)
148 147 #just index file name without it's content
149 148 u_content = u''
150 149
151 150 writer.add_document(owner=unicode(repo.contact),
152 151 repository=safe_unicode(repo_name),
153 152 path=safe_unicode(path),
154 153 content=u_content,
155 154 modtime=self.get_node_mtime(node),
156 155 extension=node.extension)
157 156
158
159 157 def build_index(self):
160 158 if os.path.exists(self.index_location):
161 159 log.debug('removing previous index')
162 160 rmtree(self.index_location)
163 161
164 162 if not os.path.exists(self.index_location):
165 163 os.mkdir(self.index_location)
166 164
167 165 idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)
168 166 writer = idx.writer()
169 167
170 168 for repo_name, repo in self.repo_paths.items():
171 169 log.debug('building index @ %s' % repo.path)
172 170
173 171 for idx_path in self.get_paths(repo):
174 172 self.add_doc(writer, idx_path, repo, repo_name)
175 173
176 174 log.debug('>> COMMITING CHANGES <<')
177 175 writer.commit(merge=True)
178 176 log.debug('>>> FINISHED BUILDING INDEX <<<')
179 177
180
181 178 def update_index(self):
182 179 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
183 180
184 181 idx = open_dir(self.index_location, indexname=self.indexname)
185 182 # The set of all paths in the index
186 183 indexed_paths = set()
187 184 # The set of all paths we need to re-index
188 185 to_index = set()
189 186
190 187 reader = idx.reader()
191 188 writer = idx.writer()
192 189
193 190 # Loop over the stored fields in the index
194 191 for fields in reader.all_stored_fields():
195 192 indexed_path = fields['path']
196 193 indexed_paths.add(indexed_path)
197 194
198 195 repo = self.repo_paths[fields['repository']]
199 196
200 197 try:
201 198 node = self.get_node(repo, indexed_path)
202 199 except (ChangesetError, NodeDoesNotExistError):
203 200 # This file was deleted since it was indexed
204 201 log.debug('removing from index %s' % indexed_path)
205 202 writer.delete_by_term('path', indexed_path)
206 203
207 204 else:
208 205 # Check if this file was changed since it was indexed
209 206 indexed_time = fields['modtime']
210 207 mtime = self.get_node_mtime(node)
211 208 if mtime > indexed_time:
212 209 # The file has changed, delete it and add it to the list of
213 210 # files to reindex
214 211 log.debug('adding to reindex list %s' % indexed_path)
215 212 writer.delete_by_term('path', indexed_path)
216 213 to_index.add(indexed_path)
217 214
218 215 # Loop over the files in the filesystem
219 216 # Assume we have a function that gathers the filenames of the
220 217 # documents to be indexed
221 218 for repo_name, repo in self.repo_paths.items():
222 219 for path in self.get_paths(repo):
223 220 if path in to_index or path not in indexed_paths:
224 221 # This is either a file that's changed, or a new file
225 222 # that wasn't indexed before. So index it!
226 223 self.add_doc(writer, path, repo, repo_name)
227 224 log.debug('re indexing %s' % path)
228 225
229 226 log.debug('>> COMMITING CHANGES <<')
230 227 writer.commit(merge=True)
231 228 log.debug('>>> FINISHED REBUILDING INDEX <<<')
232 229
233 230 def run(self, full_index=False):
234 231 """Run daemon"""
235 232 if full_index or self.initial:
236 233 self.build_index()
237 234 else:
238 235 self.update_index()
@@ -1,159 +1,169 b''
1 1 div.codeblock {
2 2 overflow: auto;
3 3 padding: 0px;
4 4 border: 1px solid #ccc;
5 5 background: #f8f8f8;
6 6 font-size: 100%;
7 7 line-height: 100%;
8 8 /* new */
9 9 line-height: 125%;
10 10 -webkit-border-radius: 4px;
11 11 -moz-border-radius: 4px;
12 12 border-radius: 4px;
13 13 }
14 14 div.codeblock .code-header{
15 15 border-bottom: 1px solid #CCCCCC;
16 16 background: #EEEEEE;
17 17 padding:10px 0 10px 0;
18 18 }
19 19
20 20 div.codeblock .code-header .stats{
21 21 clear: both;
22 22 margin-top:-3px;
23 23 padding-left: 8px;
24 24 border-bottom: 1px solid rgb(204, 204, 204);
25 25 margin-bottom: 5px; height: 23px;
26 26 }
27 27
28 28 div.codeblock .code-header .stats .left{
29 29 float:left;
30 30 }
31 31 div.codeblock .code-header .stats .left.item{
32 32 float:left;
33 33 padding: 0 9px 0 9px;
34 34 border-right:1px solid #ccc;
35 35 }
36 36 div.codeblock .code-header .stats .left.item.last{
37 37 border-right:none;
38 38 }
39 39 div.codeblock .code-header .stats .buttons{
40 40 float:right;
41 41 padding-right:4px;
42 42 }
43 43
44 44 div.codeblock .code-header .author{
45 45 margin-left:25px;
46 46 font-weight: bold;
47 47 height: 25px;
48 48 }
49 49 div.codeblock .code-header .author .user{
50 50 padding-top:3px;
51 51 }
52 52 div.codeblock .code-header .commit{
53 53 margin-left:25px;
54 54 font-weight: normal;
55 55 white-space:pre;
56 56 }
57 57
58 58 div.codeblock .code-body table{
59 59 width: 0 !important;
60 60 border: 0px !important;
61 61 }
62 62 div.codeblock .code-body table td {
63 63 border: 0px !important;
64 64 }
65 65 div.code-body {
66 66 background-color: #FFFFFF;
67 67 }
68 div.code-body pre .match{
68
69 div.codeblock .code-header .search-path {
70 padding: 0px 0px 0px 10px;
71 }
72
73 div.search-code-body {
74 background-color: #FFFFFF;
75 padding: 5px 0px 5px 10px;
76 }
77
78 div.search-code-body pre .match{
69 79 background-color: #FAFFA6;
70 80 }
71 div.code-body pre .break{
81 div.search-code-body pre .break{
72 82 background-color: #DDE7EF;
73 83 width: 100%;
74 84 color: #747474;
75 85 display: block;
76 86
77 87 }
78 88 div.annotatediv{
79 89 margin-left:2px;
80 90 margin-right:4px;
81 91 }
82 92 .code-highlight {
83 93 padding: 0px;
84 94 margin-top: 5px;
85 95 margin-bottom: 5px;
86 96 border-left: 2px solid #ccc;
87 97 }
88 98 .code-highlight pre, .linenodiv pre {
89 99 padding: 5px;
90 100 margin: 0;
91 101 }
92 102 .code-highlight pre div:target {
93 103 background-color: #FFFFBE !important;
94 104 }
95 105
96 106 .linenos a { text-decoration: none; }
97 107
98 108 .code { display: block; }
99 109 .code-highlight .hll { background-color: #ffffcc }
100 110 .code-highlight .c { color: #408080; font-style: italic } /* Comment */
101 111 .code-highlight .err { border: 1px solid #FF0000 } /* Error */
102 112 .code-highlight .k { color: #008000; font-weight: bold } /* Keyword */
103 113 .code-highlight .o { color: #666666 } /* Operator */
104 114 .code-highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */
105 115 .code-highlight .cp { color: #BC7A00 } /* Comment.Preproc */
106 116 .code-highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */
107 117 .code-highlight .cs { color: #408080; font-style: italic } /* Comment.Special */
108 118 .code-highlight .gd { color: #A00000 } /* Generic.Deleted */
109 119 .code-highlight .ge { font-style: italic } /* Generic.Emph */
110 120 .code-highlight .gr { color: #FF0000 } /* Generic.Error */
111 121 .code-highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
112 122 .code-highlight .gi { color: #00A000 } /* Generic.Inserted */
113 123 .code-highlight .go { color: #808080 } /* Generic.Output */
114 124 .code-highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
115 125 .code-highlight .gs { font-weight: bold } /* Generic.Strong */
116 126 .code-highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
117 127 .code-highlight .gt { color: #0040D0 } /* Generic.Traceback */
118 128 .code-highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
119 129 .code-highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
120 130 .code-highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
121 131 .code-highlight .kp { color: #008000 } /* Keyword.Pseudo */
122 132 .code-highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
123 133 .code-highlight .kt { color: #B00040 } /* Keyword.Type */
124 134 .code-highlight .m { color: #666666 } /* Literal.Number */
125 135 .code-highlight .s { color: #BA2121 } /* Literal.String */
126 136 .code-highlight .na { color: #7D9029 } /* Name.Attribute */
127 137 .code-highlight .nb { color: #008000 } /* Name.Builtin */
128 138 .code-highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
129 139 .code-highlight .no { color: #880000 } /* Name.Constant */
130 140 .code-highlight .nd { color: #AA22FF } /* Name.Decorator */
131 141 .code-highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */
132 142 .code-highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
133 143 .code-highlight .nf { color: #0000FF } /* Name.Function */
134 144 .code-highlight .nl { color: #A0A000 } /* Name.Label */
135 145 .code-highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
136 146 .code-highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
137 147 .code-highlight .nv { color: #19177C } /* Name.Variable */
138 148 .code-highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
139 149 .code-highlight .w { color: #bbbbbb } /* Text.Whitespace */
140 150 .code-highlight .mf { color: #666666 } /* Literal.Number.Float */
141 151 .code-highlight .mh { color: #666666 } /* Literal.Number.Hex */
142 152 .code-highlight .mi { color: #666666 } /* Literal.Number.Integer */
143 153 .code-highlight .mo { color: #666666 } /* Literal.Number.Oct */
144 154 .code-highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
145 155 .code-highlight .sc { color: #BA2121 } /* Literal.String.Char */
146 156 .code-highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
147 157 .code-highlight .s2 { color: #BA2121 } /* Literal.String.Double */
148 158 .code-highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
149 159 .code-highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
150 160 .code-highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
151 161 .code-highlight .sx { color: #008000 } /* Literal.String.Other */
152 162 .code-highlight .sr { color: #BB6688 } /* Literal.String.Regex */
153 163 .code-highlight .s1 { color: #BA2121 } /* Literal.String.Single */
154 164 .code-highlight .ss { color: #19177C } /* Literal.String.Symbol */
155 165 .code-highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
156 166 .code-highlight .vc { color: #19177C } /* Name.Variable.Class */
157 167 .code-highlight .vg { color: #19177C } /* Name.Variable.Global */
158 168 .code-highlight .vi { color: #19177C } /* Name.Variable.Instance */
159 169 .code-highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
@@ -1,31 +1,32 b''
1 1 ##content highligthing
2 2
3 3 %for cnt,sr in enumerate(c.formated_results):
4 4 %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'):
5 5 <div class="table">
6 6 <div id="body${cnt}" class="codeblock">
7 7 <div class="code-header">
8 <div class="revision">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
9 h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div>
8 <div class="search-path">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
9 h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}
10 10 </div>
11 <div class="code-body">
11 </div>
12 <div class="search-code-body">
12 13 <pre>${h.literal(sr['content_short_hl'])}</pre>
13 14 </div>
14 15 </div>
15 16 </div>
16 17 %else:
17 18 %if cnt == 0:
18 19 <div class="table">
19 20 <div id="body${cnt}" class="codeblock">
20 21 <div class="error">${_('Permission denied')}</div>
21 22 </div>
22 23 </div>
23 24 %endif
24 25
25 26 %endif
26 27 %endfor
27 28 %if c.cur_query and c.formated_results:
28 29 <div class="pagination-wh pagination-left">
29 30 ${c.formated_results.pager('$link_previous ~2~ $link_next')}
30 31 </div>
31 32 %endif
General Comments 0
You need to be logged in to leave comments. Login now