##// END OF EJS Templates
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....
marcink -
r560:3072935b default
parent child Browse files
Show More
@@ -1,269 +1,270 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 # database managment for hg app
3 # database managment for hg app
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 #
5 #
6 # This program is free software; you can redistribute it and/or
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; version 2
8 # as published by the Free Software Foundation; version 2
9 # of the License or (at your opinion) any later version of the license.
9 # of the License or (at your opinion) any later version of the license.
10 #
10 #
11 # This program is distributed in the hope that it will be useful,
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
14 # GNU General Public License for more details.
15 #
15 #
16 # You should have received a copy of the GNU General Public License
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 # MA 02110-1301, USA.
19 # MA 02110-1301, USA.
20
20
21 """
21 """
22 Created on April 10, 2010
22 Created on April 10, 2010
23 database managment and creation for hg app
23 database managment and creation for hg app
24 @author: marcink
24 @author: marcink
25 """
25 """
26
26
27 from os.path import dirname as dn, join as jn
27 from os.path import dirname as dn, join as jn
28 import os
28 import os
29 import sys
29 import sys
30 import uuid
30 import uuid
31
31
32 from rhodecode.lib.auth import get_crypt_password
32 from rhodecode.lib.auth import get_crypt_password
33 from rhodecode.lib.utils import ask_ok
33 from rhodecode.lib.utils import ask_ok
34 from rhodecode.model import init_model
34 from rhodecode.model import init_model
35 from rhodecode.model.db import User, Permission, RhodeCodeUi, RhodeCodeSettings, \
35 from rhodecode.model.db import User, Permission, RhodeCodeUi, RhodeCodeSettings, \
36 UserToPerm
36 UserToPerm
37 from rhodecode.model import meta
37 from rhodecode.model import meta
38 from sqlalchemy.engine import create_engine
38 from sqlalchemy.engine import create_engine
39 import logging
39 import logging
40
40
41 log = logging.getLogger(__name__)
41 log = logging.getLogger(__name__)
42
42
43 class DbManage(object):
43 class DbManage(object):
44 def __init__(self, log_sql, dbname, root, tests=False):
44 def __init__(self, log_sql, dbname, root, tests=False):
45 self.dbname = dbname
45 self.dbname = dbname
46 self.tests = tests
46 self.tests = tests
47 self.root = root
47 self.root = root
48 dburi = 'sqlite:////%s' % jn(self.root, self.dbname)
48 dburi = 'sqlite:////%s' % jn(self.root, self.dbname)
49 engine = create_engine(dburi, echo=log_sql)
49 engine = create_engine(dburi, echo=log_sql)
50 init_model(engine)
50 init_model(engine)
51 self.sa = meta.Session
51 self.sa = meta.Session
52 self.db_exists = False
52 self.db_exists = False
53
53
54 def check_for_db(self, override):
54 def check_for_db(self, override):
55 db_path = jn(self.root, self.dbname)
55 db_path = jn(self.root, self.dbname)
56 log.info('checking for existing db in %s', db_path)
56 log.info('checking for existing db in %s', db_path)
57 if os.path.isfile(db_path):
57 if os.path.isfile(db_path):
58 self.db_exists = True
58 self.db_exists = True
59 log.info('database exist')
59 log.info('database exist')
60 if not override:
60 if not override:
61 raise Exception('database already exists')
61 raise Exception('database already exists')
62
62
63 def create_tables(self, override=False):
63 def create_tables(self, override=False):
64 """
64 """
65 Create a auth database
65 Create a auth database
66 """
66 """
67 self.check_for_db(override)
67 self.check_for_db(override)
68 if override:
68 if override:
69 log.info("database exist and it's going to be destroyed")
69 log.info("database exist and it's going to be destroyed")
70 if self.tests:
70 if self.tests:
71 destroy = True
71 destroy = True
72 else:
72 else:
73 destroy = ask_ok('Are you sure to destroy old database ? [y/n]')
73 destroy = ask_ok('Are you sure to destroy old database ? [y/n]')
74 if not destroy:
74 if not destroy:
75 sys.exit()
75 sys.exit()
76 if self.db_exists and destroy:
76 if self.db_exists and destroy:
77 os.remove(jn(self.root, self.dbname))
77 os.remove(jn(self.root, self.dbname))
78 checkfirst = not override
78 checkfirst = not override
79 meta.Base.metadata.create_all(checkfirst=checkfirst)
79 meta.Base.metadata.create_all(checkfirst=checkfirst)
80 log.info('Created tables for %s', self.dbname)
80 log.info('Created tables for %s', self.dbname)
81
81
82 def admin_prompt(self):
82 def admin_prompt(self):
83 if not self.tests:
83 if not self.tests:
84 import getpass
84 import getpass
85 username = raw_input('Specify admin username:')
85 username = raw_input('Specify admin username:')
86 password = getpass.getpass('Specify admin password:')
86 password = getpass.getpass('Specify admin password:')
87 confirm = getpass.getpass('Confirm password:')
87 confirm = getpass.getpass('Confirm password:')
88 if password != confirm:
88 if password != confirm:
89 log.error('passwords mismatch')
89 log.error('passwords mismatch')
90 sys.exit()
90 sys.exit()
91 email = raw_input('Specify admin email:')
91 email = raw_input('Specify admin email:')
92 self.create_user(username, password, email, True)
92 self.create_user(username, password, email, True)
93 else:
93 else:
94 log.info('creating admin and regular test users')
94 log.info('creating admin and regular test users')
95 self.create_user('test_admin', 'test12', 'test_admin@mail.com', True)
95 self.create_user('test_admin', 'test12', 'test_admin@mail.com', True)
96 self.create_user('test_regular', 'test12', 'test_regular@mail.com', False)
96 self.create_user('test_regular', 'test12', 'test_regular@mail.com', False)
97 self.create_user('test_regular2', 'test12', 'test_regular2@mail.com', False)
97 self.create_user('test_regular2', 'test12', 'test_regular2@mail.com', False)
98
98
99
99
100
100
101 def config_prompt(self, test_repo_path=''):
101 def config_prompt(self, test_repo_path=''):
102 log.info('Setting up repositories config')
102 log.info('Setting up repositories config')
103
103
104 if not self.tests and not test_repo_path:
104 if not self.tests and not test_repo_path:
105 path = raw_input('Specify valid full path to your repositories'
105 path = raw_input('Specify valid full path to your repositories'
106 ' you can change this later in application settings:')
106 ' you can change this later in application settings:')
107 else:
107 else:
108 path = test_repo_path
108 path = test_repo_path
109
109
110 if not os.path.isdir(path):
110 if not os.path.isdir(path):
111 log.error('You entered wrong path: %s', path)
111 log.error('You entered wrong path: %s', path)
112 sys.exit()
112 sys.exit()
113
113
114 hooks1 = RhodeCodeUi()
114 hooks1 = RhodeCodeUi()
115 hooks1.ui_section = 'hooks'
115 hooks1.ui_section = 'hooks'
116 hooks1.ui_key = 'changegroup.update'
116 hooks1.ui_key = 'changegroup.update'
117 hooks1.ui_value = 'hg update >&2'
117 hooks1.ui_value = 'hg update >&2'
118 hooks1.ui_active = False
118
119
119 hooks2 = RhodeCodeUi()
120 hooks2 = RhodeCodeUi()
120 hooks2.ui_section = 'hooks'
121 hooks2.ui_section = 'hooks'
121 hooks2.ui_key = 'changegroup.repo_size'
122 hooks2.ui_key = 'changegroup.repo_size'
122 hooks2.ui_value = 'python:rhodecode.lib.hooks.repo_size'
123 hooks2.ui_value = 'python:rhodecode.lib.hooks.repo_size'
123
124
124 web1 = RhodeCodeUi()
125 web1 = RhodeCodeUi()
125 web1.ui_section = 'web'
126 web1.ui_section = 'web'
126 web1.ui_key = 'push_ssl'
127 web1.ui_key = 'push_ssl'
127 web1.ui_value = 'false'
128 web1.ui_value = 'false'
128
129
129 web2 = RhodeCodeUi()
130 web2 = RhodeCodeUi()
130 web2.ui_section = 'web'
131 web2.ui_section = 'web'
131 web2.ui_key = 'allow_archive'
132 web2.ui_key = 'allow_archive'
132 web2.ui_value = 'gz zip bz2'
133 web2.ui_value = 'gz zip bz2'
133
134
134 web3 = RhodeCodeUi()
135 web3 = RhodeCodeUi()
135 web3.ui_section = 'web'
136 web3.ui_section = 'web'
136 web3.ui_key = 'allow_push'
137 web3.ui_key = 'allow_push'
137 web3.ui_value = '*'
138 web3.ui_value = '*'
138
139
139 web4 = RhodeCodeUi()
140 web4 = RhodeCodeUi()
140 web4.ui_section = 'web'
141 web4.ui_section = 'web'
141 web4.ui_key = 'baseurl'
142 web4.ui_key = 'baseurl'
142 web4.ui_value = '/'
143 web4.ui_value = '/'
143
144
144 paths = RhodeCodeUi()
145 paths = RhodeCodeUi()
145 paths.ui_section = 'paths'
146 paths.ui_section = 'paths'
146 paths.ui_key = '/'
147 paths.ui_key = '/'
147 paths.ui_value = os.path.join(path, '*')
148 paths.ui_value = os.path.join(path, '*')
148
149
149
150
150 hgsettings1 = RhodeCodeSettings()
151 hgsettings1 = RhodeCodeSettings()
151
152
152 hgsettings1.app_settings_name = 'realm'
153 hgsettings1.app_settings_name = 'realm'
153 hgsettings1.app_settings_value = 'RhodeCode authentication'
154 hgsettings1.app_settings_value = 'RhodeCode authentication'
154
155
155 hgsettings2 = RhodeCodeSettings()
156 hgsettings2 = RhodeCodeSettings()
156 hgsettings2.app_settings_name = 'title'
157 hgsettings2.app_settings_name = 'title'
157 hgsettings2.app_settings_value = 'RhodeCode'
158 hgsettings2.app_settings_value = 'RhodeCode'
158
159
159 try:
160 try:
160 self.sa.add(hooks1)
161 self.sa.add(hooks1)
161 self.sa.add(hooks2)
162 self.sa.add(hooks2)
162 self.sa.add(web1)
163 self.sa.add(web1)
163 self.sa.add(web2)
164 self.sa.add(web2)
164 self.sa.add(web3)
165 self.sa.add(web3)
165 self.sa.add(web4)
166 self.sa.add(web4)
166 self.sa.add(paths)
167 self.sa.add(paths)
167 self.sa.add(hgsettings1)
168 self.sa.add(hgsettings1)
168 self.sa.add(hgsettings2)
169 self.sa.add(hgsettings2)
169 self.sa.commit()
170 self.sa.commit()
170 except:
171 except:
171 self.sa.rollback()
172 self.sa.rollback()
172 raise
173 raise
173 log.info('created ui config')
174 log.info('created ui config')
174
175
175 def create_user(self, username, password, email='', admin=False):
176 def create_user(self, username, password, email='', admin=False):
176 log.info('creating administrator user %s', username)
177 log.info('creating administrator user %s', username)
177 new_user = User()
178 new_user = User()
178 new_user.username = username
179 new_user.username = username
179 new_user.password = get_crypt_password(password)
180 new_user.password = get_crypt_password(password)
180 new_user.name = 'RhodeCode'
181 new_user.name = 'RhodeCode'
181 new_user.lastname = 'Admin'
182 new_user.lastname = 'Admin'
182 new_user.email = email
183 new_user.email = email
183 new_user.admin = admin
184 new_user.admin = admin
184 new_user.active = True
185 new_user.active = True
185
186
186 try:
187 try:
187 self.sa.add(new_user)
188 self.sa.add(new_user)
188 self.sa.commit()
189 self.sa.commit()
189 except:
190 except:
190 self.sa.rollback()
191 self.sa.rollback()
191 raise
192 raise
192
193
193 def create_default_user(self):
194 def create_default_user(self):
194 log.info('creating default user')
195 log.info('creating default user')
195 #create default user for handling default permissions.
196 #create default user for handling default permissions.
196 def_user = User()
197 def_user = User()
197 def_user.username = 'default'
198 def_user.username = 'default'
198 def_user.password = get_crypt_password(str(uuid.uuid1())[:8])
199 def_user.password = get_crypt_password(str(uuid.uuid1())[:8])
199 def_user.name = 'default'
200 def_user.name = 'default'
200 def_user.lastname = 'default'
201 def_user.lastname = 'default'
201 def_user.email = 'default@default.com'
202 def_user.email = 'default@default.com'
202 def_user.admin = False
203 def_user.admin = False
203 def_user.active = False
204 def_user.active = False
204 try:
205 try:
205 self.sa.add(def_user)
206 self.sa.add(def_user)
206 self.sa.commit()
207 self.sa.commit()
207 except:
208 except:
208 self.sa.rollback()
209 self.sa.rollback()
209 raise
210 raise
210
211
211 def create_permissions(self):
212 def create_permissions(self):
212 #module.(access|create|change|delete)_[name]
213 #module.(access|create|change|delete)_[name]
213 #module.(read|write|owner)
214 #module.(read|write|owner)
214 perms = [('repository.none', 'Repository no access'),
215 perms = [('repository.none', 'Repository no access'),
215 ('repository.read', 'Repository read access'),
216 ('repository.read', 'Repository read access'),
216 ('repository.write', 'Repository write access'),
217 ('repository.write', 'Repository write access'),
217 ('repository.admin', 'Repository admin access'),
218 ('repository.admin', 'Repository admin access'),
218 ('hg.admin', 'Hg Administrator'),
219 ('hg.admin', 'Hg Administrator'),
219 ('hg.create.repository', 'Repository create'),
220 ('hg.create.repository', 'Repository create'),
220 ('hg.create.none', 'Repository creation disabled'),
221 ('hg.create.none', 'Repository creation disabled'),
221 ('hg.register.none', 'Register disabled'),
222 ('hg.register.none', 'Register disabled'),
222 ('hg.register.manual_activate', 'Register new user with rhodecode without manual activation'),
223 ('hg.register.manual_activate', 'Register new user with rhodecode without manual activation'),
223 ('hg.register.auto_activate', 'Register new user with rhodecode without auto activation'),
224 ('hg.register.auto_activate', 'Register new user with rhodecode without auto activation'),
224 ]
225 ]
225
226
226 for p in perms:
227 for p in perms:
227 new_perm = Permission()
228 new_perm = Permission()
228 new_perm.permission_name = p[0]
229 new_perm.permission_name = p[0]
229 new_perm.permission_longname = p[1]
230 new_perm.permission_longname = p[1]
230 try:
231 try:
231 self.sa.add(new_perm)
232 self.sa.add(new_perm)
232 self.sa.commit()
233 self.sa.commit()
233 except:
234 except:
234 self.sa.rollback()
235 self.sa.rollback()
235 raise
236 raise
236
237
237 def populate_default_permissions(self):
238 def populate_default_permissions(self):
238 log.info('creating default user permissions')
239 log.info('creating default user permissions')
239
240
240 default_user = self.sa.query(User)\
241 default_user = self.sa.query(User)\
241 .filter(User.username == 'default').scalar()
242 .filter(User.username == 'default').scalar()
242
243
243 reg_perm = UserToPerm()
244 reg_perm = UserToPerm()
244 reg_perm.user = default_user
245 reg_perm.user = default_user
245 reg_perm.permission = self.sa.query(Permission)\
246 reg_perm.permission = self.sa.query(Permission)\
246 .filter(Permission.permission_name == 'hg.register.manual_activate')\
247 .filter(Permission.permission_name == 'hg.register.manual_activate')\
247 .scalar()
248 .scalar()
248
249
249 create_repo_perm = UserToPerm()
250 create_repo_perm = UserToPerm()
250 create_repo_perm.user = default_user
251 create_repo_perm.user = default_user
251 create_repo_perm.permission = self.sa.query(Permission)\
252 create_repo_perm.permission = self.sa.query(Permission)\
252 .filter(Permission.permission_name == 'hg.create.repository')\
253 .filter(Permission.permission_name == 'hg.create.repository')\
253 .scalar()
254 .scalar()
254
255
255 default_repo_perm = UserToPerm()
256 default_repo_perm = UserToPerm()
256 default_repo_perm.user = default_user
257 default_repo_perm.user = default_user
257 default_repo_perm.permission = self.sa.query(Permission)\
258 default_repo_perm.permission = self.sa.query(Permission)\
258 .filter(Permission.permission_name == 'repository.read')\
259 .filter(Permission.permission_name == 'repository.read')\
259 .scalar()
260 .scalar()
260
261
261 try:
262 try:
262 self.sa.add(reg_perm)
263 self.sa.add(reg_perm)
263 self.sa.add(create_repo_perm)
264 self.sa.add(create_repo_perm)
264 self.sa.add(default_repo_perm)
265 self.sa.add(default_repo_perm)
265 self.sa.commit()
266 self.sa.commit()
266 except:
267 except:
267 self.sa.rollback()
268 self.sa.rollback()
268 raise
269 raise
269
270
@@ -1,238 +1,237 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 # whoosh indexer daemon for rhodecode
3 # whoosh indexer daemon for rhodecode
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 #
5 #
6 # This program is free software; you can redistribute it and/or
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; version 2
8 # as published by the Free Software Foundation; version 2
9 # of the License or (at your opinion) any later version of the license.
9 # of the License or (at your opinion) any later version of the license.
10 #
10 #
11 # This program is distributed in the hope that it will be useful,
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
14 # GNU General Public License for more details.
15 #
15 #
16 # You should have received a copy of the GNU General Public License
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 # MA 02110-1301, USA.
19 # MA 02110-1301, USA.
20 """
20 """
21 Created on Jan 26, 2010
21 Created on Jan 26, 2010
22
22
23 @author: marcink
23 @author: marcink
24 A deamon will read from task table and run tasks
24 A deamon will read from task table and run tasks
25 """
25 """
26 import sys
26 import sys
27 import os
27 import os
28 from os.path import dirname as dn
28 from os.path import dirname as dn
29 from os.path import join as jn
29 from os.path import join as jn
30
30
31 #to get the rhodecode import
31 #to get the rhodecode import
32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
33 sys.path.append(project_path)
33 sys.path.append(project_path)
34
34
35 from rhodecode.lib.pidlock import LockHeld, DaemonLock
35 from rhodecode.lib.pidlock import LockHeld, DaemonLock
36 from rhodecode.model.hg_model import HgModel
36 from rhodecode.model.hg_model import HgModel
37 from rhodecode.lib.helpers import safe_unicode
37 from rhodecode.lib.helpers import safe_unicode
38 from whoosh.index import create_in, open_dir
38 from whoosh.index import create_in, open_dir
39 from shutil import rmtree
39 from shutil import rmtree
40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
41
41
42 from time import mktime
43 from vcs.backends import hg
44
42 import logging
45 import logging
43
46
44 log = logging.getLogger('whooshIndexer')
47 log = logging.getLogger('whooshIndexer')
45 # create logger
48 # create logger
46 log.setLevel(logging.DEBUG)
49 log.setLevel(logging.DEBUG)
47 log.propagate = False
50 log.propagate = False
48 # create console handler and set level to debug
51 # create console handler and set level to debug
49 ch = logging.StreamHandler()
52 ch = logging.StreamHandler()
50 ch.setLevel(logging.DEBUG)
53 ch.setLevel(logging.DEBUG)
51
54
52 # create formatter
55 # create formatter
53 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
56 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
54
57
55 # add formatter to ch
58 # add formatter to ch
56 ch.setFormatter(formatter)
59 ch.setFormatter(formatter)
57
60
58 # add ch to logger
61 # add ch to logger
59 log.addHandler(ch)
62 log.addHandler(ch)
60
63
61 def scan_paths(root_location):
64 def scan_paths(root_location):
62 return HgModel.repo_scan('/', root_location, None, True)
65 return HgModel.repo_scan('/', root_location, None, True)
63
66
64 class WhooshIndexingDaemon(object):
67 class WhooshIndexingDaemon(object):
65 """Deamon for atomic jobs"""
68 """
69 Deamon for atomic jobs
70 """
66
71
67 def __init__(self, indexname='HG_INDEX', repo_location=None):
72 def __init__(self, indexname='HG_INDEX', repo_location=None):
68 self.indexname = indexname
73 self.indexname = indexname
69 self.repo_location = repo_location
74 self.repo_location = repo_location
70 self.initial = False
75 self.initial = False
71 if not os.path.isdir(IDX_LOCATION):
76 if not os.path.isdir(IDX_LOCATION):
72 os.mkdir(IDX_LOCATION)
77 os.mkdir(IDX_LOCATION)
73 log.info('Cannot run incremental index since it does not'
78 log.info('Cannot run incremental index since it does not'
74 ' yet exist running full build')
79 ' yet exist running full build')
75 self.initial = True
80 self.initial = True
76
81
77 def get_paths(self, root_dir):
82 def get_paths(self, root_dir):
78 """recursive walk in root dir and return a set of all path in that dir
83 """
79 excluding files in .hg dir"""
84 recursive walk in root dir and return a set of all path in that dir
85 based on repository walk function
86 """
87 repo = hg.MercurialRepository(root_dir)
80 index_paths_ = set()
88 index_paths_ = set()
81 for path, dirs, files in os.walk(root_dir):
89 for topnode, dirs, files in repo.walk('/', 'tip'):
82 if path.find('.hg') == -1:
90 for f in files:
91 index_paths_.add(jn(root_dir, f.path))
92 for dir in dirs:
83 for f in files:
93 for f in files:
84 index_paths_.add(jn(path, f))
94 index_paths_.add(jn(root_dir, f.path))
85
95
86 return index_paths_
96 return index_paths_
87
97
98
88 def add_doc(self, writer, path, repo):
99 def add_doc(self, writer, path, repo):
89 """Adding doc to writer"""
100 """Adding doc to writer"""
90
101 n_path = path[len(repo.path) + 1:]
91 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
102 node = repo.get_changeset().get_node(n_path)
92 #we just index the content of choosen files
103
93 if ext in INDEX_EXTENSIONS:
104 #we just index the content of chosen files
105 if node.extension in INDEX_EXTENSIONS:
94 log.debug(' >> %s [WITH CONTENT]' % path)
106 log.debug(' >> %s [WITH CONTENT]' % path)
95 fobj = open(path, 'rb')
107 u_content = node.content
96 content = fobj.read()
97 fobj.close()
98 u_content = safe_unicode(content)
99 else:
108 else:
100 log.debug(' >> %s' % path)
109 log.debug(' >> %s' % path)
101 #just index file name without it's content
110 #just index file name without it's content
102 u_content = u''
111 u_content = u''
103
112
104
113 writer.add_document(owner=unicode(repo.contact),
105
114 repository=safe_unicode(repo.name),
106 try:
115 path=safe_unicode(path),
107 os.stat(path)
116 content=u_content,
108 writer.add_document(owner=unicode(repo.contact),
117 modtime=mktime(node.last_changeset.date.timetuple()),
109 repository=safe_unicode(repo.name),
118 extension=node.extension)
110 path=safe_unicode(path),
111 content=u_content,
112 modtime=os.path.getmtime(path),
113 extension=ext)
114 except OSError, e:
115 import errno
116 if e.errno == errno.ENOENT:
117 log.debug('path %s does not exist or is a broken symlink' % path)
118 else:
119 raise e
120
119
121
120
122 def build_index(self):
121 def build_index(self):
123 if os.path.exists(IDX_LOCATION):
122 if os.path.exists(IDX_LOCATION):
124 log.debug('removing previos index')
123 log.debug('removing previous index')
125 rmtree(IDX_LOCATION)
124 rmtree(IDX_LOCATION)
126
125
127 if not os.path.exists(IDX_LOCATION):
126 if not os.path.exists(IDX_LOCATION):
128 os.mkdir(IDX_LOCATION)
127 os.mkdir(IDX_LOCATION)
129
128
130 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
129 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
131 writer = idx.writer()
130 writer = idx.writer()
132
131
133 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
132 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
134 log.debug('building index @ %s' % repo.path)
133 log.debug('building index @ %s' % repo.path)
135
134
136 for idx_path in self.get_paths(repo.path):
135 for idx_path in self.get_paths(repo.path):
137 self.add_doc(writer, idx_path, repo)
136 self.add_doc(writer, idx_path, repo)
138 writer.commit(merge=True)
137 writer.commit(merge=True)
139
138
140 log.debug('>>> FINISHED BUILDING INDEX <<<')
139 log.debug('>>> FINISHED BUILDING INDEX <<<')
141
140
142
141
143 def update_index(self):
142 def update_index(self):
144 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
143 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
145
144
146 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
145 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
147 # The set of all paths in the index
146 # The set of all paths in the index
148 indexed_paths = set()
147 indexed_paths = set()
149 # The set of all paths we need to re-index
148 # The set of all paths we need to re-index
150 to_index = set()
149 to_index = set()
151
150
152 reader = idx.reader()
151 reader = idx.reader()
153 writer = idx.writer()
152 writer = idx.writer()
154
153
155 # Loop over the stored fields in the index
154 # Loop over the stored fields in the index
156 for fields in reader.all_stored_fields():
155 for fields in reader.all_stored_fields():
157 indexed_path = fields['path']
156 indexed_path = fields['path']
158 indexed_paths.add(indexed_path)
157 indexed_paths.add(indexed_path)
159
158
160 if not os.path.exists(indexed_path):
159 if not os.path.exists(indexed_path):
161 # This file was deleted since it was indexed
160 # This file was deleted since it was indexed
162 log.debug('removing from index %s' % indexed_path)
161 log.debug('removing from index %s' % indexed_path)
163 writer.delete_by_term('path', indexed_path)
162 writer.delete_by_term('path', indexed_path)
164
163
165 else:
164 else:
166 # Check if this file was changed since it
165 # Check if this file was changed since it
167 # was indexed
166 # was indexed
168 indexed_time = fields['modtime']
167 indexed_time = fields['modtime']
169
168
170 mtime = os.path.getmtime(indexed_path)
169 mtime = os.path.getmtime(indexed_path)
171
170
172 if mtime > indexed_time:
171 if mtime > indexed_time:
173
172
174 # The file has changed, delete it and add it to the list of
173 # The file has changed, delete it and add it to the list of
175 # files to reindex
174 # files to reindex
176 log.debug('adding to reindex list %s' % indexed_path)
175 log.debug('adding to reindex list %s' % indexed_path)
177 writer.delete_by_term('path', indexed_path)
176 writer.delete_by_term('path', indexed_path)
178 to_index.add(indexed_path)
177 to_index.add(indexed_path)
179 #writer.commit()
178 #writer.commit()
180
179
181 # Loop over the files in the filesystem
180 # Loop over the files in the filesystem
182 # Assume we have a function that gathers the filenames of the
181 # Assume we have a function that gathers the filenames of the
183 # documents to be indexed
182 # documents to be indexed
184 for repo in scan_paths(self.repo_location).values():
183 for repo in scan_paths(self.repo_location).values():
185 for path in self.get_paths(repo.path):
184 for path in self.get_paths(repo.path):
186 if path in to_index or path not in indexed_paths:
185 if path in to_index or path not in indexed_paths:
187 # This is either a file that's changed, or a new file
186 # This is either a file that's changed, or a new file
188 # that wasn't indexed before. So index it!
187 # that wasn't indexed before. So index it!
189 self.add_doc(writer, path, repo)
188 self.add_doc(writer, path, repo)
190 log.debug('reindexing %s' % path)
189 log.debug('reindexing %s' % path)
191
190
192 writer.commit(merge=True)
191 writer.commit(merge=True)
193 #idx.optimize()
192 #idx.optimize()
194 log.debug('>>> FINISHED <<<')
193 log.debug('>>> FINISHED <<<')
195
194
196 def run(self, full_index=False):
195 def run(self, full_index=False):
197 """Run daemon"""
196 """Run daemon"""
198 if full_index or self.initial:
197 if full_index or self.initial:
199 self.build_index()
198 self.build_index()
200 else:
199 else:
201 self.update_index()
200 self.update_index()
202
201
203 if __name__ == "__main__":
202 if __name__ == "__main__":
204 arg = sys.argv[1:]
203 arg = sys.argv[1:]
205 if len(arg) != 2:
204 if len(arg) != 2:
206 sys.stderr.write('Please specify indexing type [full|incremental]'
205 sys.stderr.write('Please specify indexing type [full|incremental]'
207 'and path to repositories as script args \n')
206 'and path to repositories as script args \n')
208 sys.exit()
207 sys.exit()
209
208
210
209
211 if arg[0] == 'full':
210 if arg[0] == 'full':
212 full_index = True
211 full_index = True
213 elif arg[0] == 'incremental':
212 elif arg[0] == 'incremental':
214 # False means looking just for changes
213 # False means looking just for changes
215 full_index = False
214 full_index = False
216 else:
215 else:
217 sys.stdout.write('Please use [full|incremental]'
216 sys.stdout.write('Please use [full|incremental]'
218 ' as script first arg \n')
217 ' as script first arg \n')
219 sys.exit()
218 sys.exit()
220
219
221 if not os.path.isdir(arg[1]):
220 if not os.path.isdir(arg[1]):
222 sys.stderr.write('%s is not a valid path \n' % arg[1])
221 sys.stderr.write('%s is not a valid path \n' % arg[1])
223 sys.exit()
222 sys.exit()
224 else:
223 else:
225 if arg[1].endswith('/'):
224 if arg[1].endswith('/'):
226 repo_location = arg[1] + '*'
225 repo_location = arg[1] + '*'
227 else:
226 else:
228 repo_location = arg[1] + '/*'
227 repo_location = arg[1] + '/*'
229
228
230 try:
229 try:
231 l = DaemonLock()
230 l = DaemonLock()
232 WhooshIndexingDaemon(repo_location=repo_location)\
231 WhooshIndexingDaemon(repo_location=repo_location)\
233 .run(full_index=full_index)
232 .run(full_index=full_index)
234 l.release()
233 l.release()
235 reload(logging)
234 reload(logging)
236 except LockHeld:
235 except LockHeld:
237 sys.exit(1)
236 sys.exit(1)
238
237
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now