##// END OF EJS Templates
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....
marcink -
r560:3072935b default
parent child Browse files
Show More
@@ -1,269 +1,270 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 # database managment for hg app
4 4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 5 #
6 6 # This program is free software; you can redistribute it and/or
7 7 # modify it under the terms of the GNU General Public License
8 8 # as published by the Free Software Foundation; version 2
9 9 # of the License or (at your opinion) any later version of the license.
10 10 #
11 11 # This program is distributed in the hope that it will be useful,
12 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 14 # GNU General Public License for more details.
15 15 #
16 16 # You should have received a copy of the GNU General Public License
17 17 # along with this program; if not, write to the Free Software
18 18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 19 # MA 02110-1301, USA.
20 20
21 21 """
22 22 Created on April 10, 2010
23 23 database managment and creation for hg app
24 24 @author: marcink
25 25 """
26 26
27 27 from os.path import dirname as dn, join as jn
28 28 import os
29 29 import sys
30 30 import uuid
31 31
32 32 from rhodecode.lib.auth import get_crypt_password
33 33 from rhodecode.lib.utils import ask_ok
34 34 from rhodecode.model import init_model
35 35 from rhodecode.model.db import User, Permission, RhodeCodeUi, RhodeCodeSettings, \
36 36 UserToPerm
37 37 from rhodecode.model import meta
38 38 from sqlalchemy.engine import create_engine
39 39 import logging
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43 class DbManage(object):
44 44 def __init__(self, log_sql, dbname, root, tests=False):
45 45 self.dbname = dbname
46 46 self.tests = tests
47 47 self.root = root
48 48 dburi = 'sqlite:////%s' % jn(self.root, self.dbname)
49 49 engine = create_engine(dburi, echo=log_sql)
50 50 init_model(engine)
51 51 self.sa = meta.Session
52 52 self.db_exists = False
53 53
54 54 def check_for_db(self, override):
55 55 db_path = jn(self.root, self.dbname)
56 56 log.info('checking for existing db in %s', db_path)
57 57 if os.path.isfile(db_path):
58 58 self.db_exists = True
59 59 log.info('database exist')
60 60 if not override:
61 61 raise Exception('database already exists')
62 62
63 63 def create_tables(self, override=False):
64 64 """
65 65 Create a auth database
66 66 """
67 67 self.check_for_db(override)
68 68 if override:
69 69 log.info("database exist and it's going to be destroyed")
70 70 if self.tests:
71 71 destroy = True
72 72 else:
73 73 destroy = ask_ok('Are you sure to destroy old database ? [y/n]')
74 74 if not destroy:
75 75 sys.exit()
76 76 if self.db_exists and destroy:
77 77 os.remove(jn(self.root, self.dbname))
78 78 checkfirst = not override
79 79 meta.Base.metadata.create_all(checkfirst=checkfirst)
80 80 log.info('Created tables for %s', self.dbname)
81 81
82 82 def admin_prompt(self):
83 83 if not self.tests:
84 84 import getpass
85 85 username = raw_input('Specify admin username:')
86 86 password = getpass.getpass('Specify admin password:')
87 87 confirm = getpass.getpass('Confirm password:')
88 88 if password != confirm:
89 89 log.error('passwords mismatch')
90 90 sys.exit()
91 91 email = raw_input('Specify admin email:')
92 92 self.create_user(username, password, email, True)
93 93 else:
94 94 log.info('creating admin and regular test users')
95 95 self.create_user('test_admin', 'test12', 'test_admin@mail.com', True)
96 96 self.create_user('test_regular', 'test12', 'test_regular@mail.com', False)
97 97 self.create_user('test_regular2', 'test12', 'test_regular2@mail.com', False)
98 98
99 99
100 100
101 101 def config_prompt(self, test_repo_path=''):
102 102 log.info('Setting up repositories config')
103 103
104 104 if not self.tests and not test_repo_path:
105 105 path = raw_input('Specify valid full path to your repositories'
106 106 ' you can change this later in application settings:')
107 107 else:
108 108 path = test_repo_path
109 109
110 110 if not os.path.isdir(path):
111 111 log.error('You entered wrong path: %s', path)
112 112 sys.exit()
113 113
114 114 hooks1 = RhodeCodeUi()
115 115 hooks1.ui_section = 'hooks'
116 116 hooks1.ui_key = 'changegroup.update'
117 117 hooks1.ui_value = 'hg update >&2'
118 hooks1.ui_active = False
118 119
119 120 hooks2 = RhodeCodeUi()
120 121 hooks2.ui_section = 'hooks'
121 122 hooks2.ui_key = 'changegroup.repo_size'
122 123 hooks2.ui_value = 'python:rhodecode.lib.hooks.repo_size'
123 124
124 125 web1 = RhodeCodeUi()
125 126 web1.ui_section = 'web'
126 127 web1.ui_key = 'push_ssl'
127 128 web1.ui_value = 'false'
128 129
129 130 web2 = RhodeCodeUi()
130 131 web2.ui_section = 'web'
131 132 web2.ui_key = 'allow_archive'
132 133 web2.ui_value = 'gz zip bz2'
133 134
134 135 web3 = RhodeCodeUi()
135 136 web3.ui_section = 'web'
136 137 web3.ui_key = 'allow_push'
137 138 web3.ui_value = '*'
138 139
139 140 web4 = RhodeCodeUi()
140 141 web4.ui_section = 'web'
141 142 web4.ui_key = 'baseurl'
142 143 web4.ui_value = '/'
143 144
144 145 paths = RhodeCodeUi()
145 146 paths.ui_section = 'paths'
146 147 paths.ui_key = '/'
147 148 paths.ui_value = os.path.join(path, '*')
148 149
149 150
150 151 hgsettings1 = RhodeCodeSettings()
151 152
152 153 hgsettings1.app_settings_name = 'realm'
153 154 hgsettings1.app_settings_value = 'RhodeCode authentication'
154 155
155 156 hgsettings2 = RhodeCodeSettings()
156 157 hgsettings2.app_settings_name = 'title'
157 158 hgsettings2.app_settings_value = 'RhodeCode'
158 159
159 160 try:
160 161 self.sa.add(hooks1)
161 162 self.sa.add(hooks2)
162 163 self.sa.add(web1)
163 164 self.sa.add(web2)
164 165 self.sa.add(web3)
165 166 self.sa.add(web4)
166 167 self.sa.add(paths)
167 168 self.sa.add(hgsettings1)
168 169 self.sa.add(hgsettings2)
169 170 self.sa.commit()
170 171 except:
171 172 self.sa.rollback()
172 173 raise
173 174 log.info('created ui config')
174 175
175 176 def create_user(self, username, password, email='', admin=False):
176 177 log.info('creating administrator user %s', username)
177 178 new_user = User()
178 179 new_user.username = username
179 180 new_user.password = get_crypt_password(password)
180 181 new_user.name = 'RhodeCode'
181 182 new_user.lastname = 'Admin'
182 183 new_user.email = email
183 184 new_user.admin = admin
184 185 new_user.active = True
185 186
186 187 try:
187 188 self.sa.add(new_user)
188 189 self.sa.commit()
189 190 except:
190 191 self.sa.rollback()
191 192 raise
192 193
193 194 def create_default_user(self):
194 195 log.info('creating default user')
195 196 #create default user for handling default permissions.
196 197 def_user = User()
197 198 def_user.username = 'default'
198 199 def_user.password = get_crypt_password(str(uuid.uuid1())[:8])
199 200 def_user.name = 'default'
200 201 def_user.lastname = 'default'
201 202 def_user.email = 'default@default.com'
202 203 def_user.admin = False
203 204 def_user.active = False
204 205 try:
205 206 self.sa.add(def_user)
206 207 self.sa.commit()
207 208 except:
208 209 self.sa.rollback()
209 210 raise
210 211
211 212 def create_permissions(self):
212 213 #module.(access|create|change|delete)_[name]
213 214 #module.(read|write|owner)
214 215 perms = [('repository.none', 'Repository no access'),
215 216 ('repository.read', 'Repository read access'),
216 217 ('repository.write', 'Repository write access'),
217 218 ('repository.admin', 'Repository admin access'),
218 219 ('hg.admin', 'Hg Administrator'),
219 220 ('hg.create.repository', 'Repository create'),
220 221 ('hg.create.none', 'Repository creation disabled'),
221 222 ('hg.register.none', 'Register disabled'),
222 223 ('hg.register.manual_activate', 'Register new user with rhodecode without manual activation'),
223 224 ('hg.register.auto_activate', 'Register new user with rhodecode without auto activation'),
224 225 ]
225 226
226 227 for p in perms:
227 228 new_perm = Permission()
228 229 new_perm.permission_name = p[0]
229 230 new_perm.permission_longname = p[1]
230 231 try:
231 232 self.sa.add(new_perm)
232 233 self.sa.commit()
233 234 except:
234 235 self.sa.rollback()
235 236 raise
236 237
237 238 def populate_default_permissions(self):
238 239 log.info('creating default user permissions')
239 240
240 241 default_user = self.sa.query(User)\
241 242 .filter(User.username == 'default').scalar()
242 243
243 244 reg_perm = UserToPerm()
244 245 reg_perm.user = default_user
245 246 reg_perm.permission = self.sa.query(Permission)\
246 247 .filter(Permission.permission_name == 'hg.register.manual_activate')\
247 248 .scalar()
248 249
249 250 create_repo_perm = UserToPerm()
250 251 create_repo_perm.user = default_user
251 252 create_repo_perm.permission = self.sa.query(Permission)\
252 253 .filter(Permission.permission_name == 'hg.create.repository')\
253 254 .scalar()
254 255
255 256 default_repo_perm = UserToPerm()
256 257 default_repo_perm.user = default_user
257 258 default_repo_perm.permission = self.sa.query(Permission)\
258 259 .filter(Permission.permission_name == 'repository.read')\
259 260 .scalar()
260 261
261 262 try:
262 263 self.sa.add(reg_perm)
263 264 self.sa.add(create_repo_perm)
264 265 self.sa.add(default_repo_perm)
265 266 self.sa.commit()
266 267 except:
267 268 self.sa.rollback()
268 269 raise
269 270
@@ -1,238 +1,237 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 # whoosh indexer daemon for rhodecode
4 4 # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
5 5 #
6 6 # This program is free software; you can redistribute it and/or
7 7 # modify it under the terms of the GNU General Public License
8 8 # as published by the Free Software Foundation; version 2
9 9 # of the License or (at your opinion) any later version of the license.
10 10 #
11 11 # This program is distributed in the hope that it will be useful,
12 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 14 # GNU General Public License for more details.
15 15 #
16 16 # You should have received a copy of the GNU General Public License
17 17 # along with this program; if not, write to the Free Software
18 18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 19 # MA 02110-1301, USA.
20 20 """
21 21 Created on Jan 26, 2010
22 22
23 23 @author: marcink
24 24 A deamon will read from task table and run tasks
25 25 """
26 26 import sys
27 27 import os
28 28 from os.path import dirname as dn
29 29 from os.path import join as jn
30 30
31 31 #to get the rhodecode import
32 32 project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
33 33 sys.path.append(project_path)
34 34
35 35 from rhodecode.lib.pidlock import LockHeld, DaemonLock
36 36 from rhodecode.model.hg_model import HgModel
37 37 from rhodecode.lib.helpers import safe_unicode
38 38 from whoosh.index import create_in, open_dir
39 39 from shutil import rmtree
40 40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
41 41
42 from time import mktime
43 from vcs.backends import hg
44
42 45 import logging
43 46
44 47 log = logging.getLogger('whooshIndexer')
45 48 # create logger
46 49 log.setLevel(logging.DEBUG)
47 50 log.propagate = False
48 51 # create console handler and set level to debug
49 52 ch = logging.StreamHandler()
50 53 ch.setLevel(logging.DEBUG)
51 54
52 55 # create formatter
53 56 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
54 57
55 58 # add formatter to ch
56 59 ch.setFormatter(formatter)
57 60
58 61 # add ch to logger
59 62 log.addHandler(ch)
60 63
61 64 def scan_paths(root_location):
62 65 return HgModel.repo_scan('/', root_location, None, True)
63 66
64 67 class WhooshIndexingDaemon(object):
65 """Deamon for atomic jobs"""
68 """
69 Deamon for atomic jobs
70 """
66 71
67 72 def __init__(self, indexname='HG_INDEX', repo_location=None):
68 73 self.indexname = indexname
69 74 self.repo_location = repo_location
70 75 self.initial = False
71 76 if not os.path.isdir(IDX_LOCATION):
72 77 os.mkdir(IDX_LOCATION)
73 78 log.info('Cannot run incremental index since it does not'
74 79 ' yet exist running full build')
75 80 self.initial = True
76
81
77 82 def get_paths(self, root_dir):
78 """recursive walk in root dir and return a set of all path in that dir
79 excluding files in .hg dir"""
83 """
84 recursive walk in root dir and return a set of all path in that dir
85 based on repository walk function
86 """
87 repo = hg.MercurialRepository(root_dir)
80 88 index_paths_ = set()
81 for path, dirs, files in os.walk(root_dir):
82 if path.find('.hg') == -1:
89 for topnode, dirs, files in repo.walk('/', 'tip'):
90 for f in files:
91 index_paths_.add(jn(root_dir, f.path))
92 for dir in dirs:
83 93 for f in files:
84 index_paths_.add(jn(path, f))
85
86 return index_paths_
87
94 index_paths_.add(jn(root_dir, f.path))
95
96 return index_paths_
97
98
88 99 def add_doc(self, writer, path, repo):
89 100 """Adding doc to writer"""
90
91 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
92 #we just index the content of choosen files
93 if ext in INDEX_EXTENSIONS:
101 n_path = path[len(repo.path) + 1:]
102 node = repo.get_changeset().get_node(n_path)
103
104 #we just index the content of chosen files
105 if node.extension in INDEX_EXTENSIONS:
94 106 log.debug(' >> %s [WITH CONTENT]' % path)
95 fobj = open(path, 'rb')
96 content = fobj.read()
97 fobj.close()
98 u_content = safe_unicode(content)
107 u_content = node.content
99 108 else:
100 109 log.debug(' >> %s' % path)
101 110 #just index file name without it's content
102 111 u_content = u''
103 112
104
105
106 try:
107 os.stat(path)
108 writer.add_document(owner=unicode(repo.contact),
109 repository=safe_unicode(repo.name),
110 path=safe_unicode(path),
111 content=u_content,
112 modtime=os.path.getmtime(path),
113 extension=ext)
114 except OSError, e:
115 import errno
116 if e.errno == errno.ENOENT:
117 log.debug('path %s does not exist or is a broken symlink' % path)
118 else:
119 raise e
113 writer.add_document(owner=unicode(repo.contact),
114 repository=safe_unicode(repo.name),
115 path=safe_unicode(path),
116 content=u_content,
117 modtime=mktime(node.last_changeset.date.timetuple()),
118 extension=node.extension)
120 119
121 120
122 121 def build_index(self):
123 122 if os.path.exists(IDX_LOCATION):
124 log.debug('removing previos index')
123 log.debug('removing previous index')
125 124 rmtree(IDX_LOCATION)
126 125
127 126 if not os.path.exists(IDX_LOCATION):
128 127 os.mkdir(IDX_LOCATION)
129 128
130 129 idx = create_in(IDX_LOCATION, SCHEMA, indexname=IDX_NAME)
131 130 writer = idx.writer()
132 131
133 132 for cnt, repo in enumerate(scan_paths(self.repo_location).values()):
134 133 log.debug('building index @ %s' % repo.path)
135 134
136 135 for idx_path in self.get_paths(repo.path):
137 136 self.add_doc(writer, idx_path, repo)
138 137 writer.commit(merge=True)
139 138
140 139 log.debug('>>> FINISHED BUILDING INDEX <<<')
141 140
142 141
143 142 def update_index(self):
144 143 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
145 144
146 145 idx = open_dir(IDX_LOCATION, indexname=self.indexname)
147 146 # The set of all paths in the index
148 147 indexed_paths = set()
149 148 # The set of all paths we need to re-index
150 149 to_index = set()
151 150
152 151 reader = idx.reader()
153 152 writer = idx.writer()
154 153
155 154 # Loop over the stored fields in the index
156 155 for fields in reader.all_stored_fields():
157 156 indexed_path = fields['path']
158 157 indexed_paths.add(indexed_path)
159 158
160 159 if not os.path.exists(indexed_path):
161 160 # This file was deleted since it was indexed
162 161 log.debug('removing from index %s' % indexed_path)
163 162 writer.delete_by_term('path', indexed_path)
164 163
165 164 else:
166 165 # Check if this file was changed since it
167 166 # was indexed
168 167 indexed_time = fields['modtime']
169 168
170 169 mtime = os.path.getmtime(indexed_path)
171 170
172 171 if mtime > indexed_time:
173 172
174 173 # The file has changed, delete it and add it to the list of
175 174 # files to reindex
176 175 log.debug('adding to reindex list %s' % indexed_path)
177 176 writer.delete_by_term('path', indexed_path)
178 177 to_index.add(indexed_path)
179 178 #writer.commit()
180 179
181 180 # Loop over the files in the filesystem
182 181 # Assume we have a function that gathers the filenames of the
183 182 # documents to be indexed
184 183 for repo in scan_paths(self.repo_location).values():
185 184 for path in self.get_paths(repo.path):
186 185 if path in to_index or path not in indexed_paths:
187 186 # This is either a file that's changed, or a new file
188 187 # that wasn't indexed before. So index it!
189 188 self.add_doc(writer, path, repo)
190 189 log.debug('reindexing %s' % path)
191 190
192 191 writer.commit(merge=True)
193 192 #idx.optimize()
194 193 log.debug('>>> FINISHED <<<')
195 194
196 195 def run(self, full_index=False):
197 196 """Run daemon"""
198 197 if full_index or self.initial:
199 198 self.build_index()
200 199 else:
201 200 self.update_index()
202 201
203 202 if __name__ == "__main__":
204 203 arg = sys.argv[1:]
205 204 if len(arg) != 2:
206 205 sys.stderr.write('Please specify indexing type [full|incremental]'
207 206 'and path to repositories as script args \n')
208 207 sys.exit()
209 208
210 209
211 210 if arg[0] == 'full':
212 211 full_index = True
213 212 elif arg[0] == 'incremental':
214 213 # False means looking just for changes
215 214 full_index = False
216 215 else:
217 216 sys.stdout.write('Please use [full|incremental]'
218 217 ' as script first arg \n')
219 218 sys.exit()
220 219
221 220 if not os.path.isdir(arg[1]):
222 221 sys.stderr.write('%s is not a valid path \n' % arg[1])
223 222 sys.exit()
224 223 else:
225 224 if arg[1].endswith('/'):
226 225 repo_location = arg[1] + '*'
227 226 else:
228 227 repo_location = arg[1] + '/*'
229 228
230 229 try:
231 230 l = DaemonLock()
232 231 WhooshIndexingDaemon(repo_location=repo_location)\
233 232 .run(full_index=full_index)
234 233 l.release()
235 234 reload(logging)
236 235 except LockHeld:
237 236 sys.exit(1)
238 237
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now