##// END OF EJS Templates
rewrote whoosh indexing to run internal repository.walk() instead of filesystem....
marcink -
r560:3072935b default
parent child Browse files
Show More
@@ -115,6 +115,7 b' class DbManage(object):'
115 115 hooks1.ui_section = 'hooks'
116 116 hooks1.ui_key = 'changegroup.update'
117 117 hooks1.ui_value = 'hg update >&2'
118 hooks1.ui_active = False
118 119
119 120 hooks2 = RhodeCodeUi()
120 121 hooks2.ui_section = 'hooks'
@@ -39,6 +39,9 b' from whoosh.index import create_in, open'
39 39 from shutil import rmtree
40 40 from rhodecode.lib.indexers import INDEX_EXTENSIONS, IDX_LOCATION, SCHEMA, IDX_NAME
41 41
42 from time import mktime
43 from vcs.backends import hg
44
42 45 import logging
43 46
44 47 log = logging.getLogger('whooshIndexer')
@@ -62,7 +65,9 b' def scan_paths(root_location):'
62 65 return HgModel.repo_scan('/', root_location, None, True)
63 66
64 67 class WhooshIndexingDaemon(object):
65 """Deamon for atomic jobs"""
68 """
69 Deamon for atomic jobs
70 """
66 71
67 72 def __init__(self, indexname='HG_INDEX', repo_location=None):
68 73 self.indexname = indexname
@@ -73,55 +78,49 b' class WhooshIndexingDaemon(object):'
73 78 log.info('Cannot run incremental index since it does not'
74 79 ' yet exist running full build')
75 80 self.initial = True
76
81
77 82 def get_paths(self, root_dir):
78 """recursive walk in root dir and return a set of all path in that dir
79 excluding files in .hg dir"""
83 """
84 recursive walk in root dir and return a set of all path in that dir
85 based on repository walk function
86 """
87 repo = hg.MercurialRepository(root_dir)
80 88 index_paths_ = set()
81 for path, dirs, files in os.walk(root_dir):
82 if path.find('.hg') == -1:
89 for topnode, dirs, files in repo.walk('/', 'tip'):
90 for f in files:
91 index_paths_.add(jn(root_dir, f.path))
92 for dir in dirs:
83 93 for f in files:
84 index_paths_.add(jn(path, f))
85
86 return index_paths_
87
94 index_paths_.add(jn(root_dir, f.path))
95
96 return index_paths_
97
98
88 99 def add_doc(self, writer, path, repo):
89 100 """Adding doc to writer"""
90
91 ext = unicode(path.split('/')[-1].split('.')[-1].lower())
92 #we just index the content of choosen files
93 if ext in INDEX_EXTENSIONS:
101 n_path = path[len(repo.path) + 1:]
102 node = repo.get_changeset().get_node(n_path)
103
104 #we just index the content of chosen files
105 if node.extension in INDEX_EXTENSIONS:
94 106 log.debug(' >> %s [WITH CONTENT]' % path)
95 fobj = open(path, 'rb')
96 content = fobj.read()
97 fobj.close()
98 u_content = safe_unicode(content)
107 u_content = node.content
99 108 else:
100 109 log.debug(' >> %s' % path)
101 110 #just index file name without it's content
102 111 u_content = u''
103 112
104
105
106 try:
107 os.stat(path)
108 writer.add_document(owner=unicode(repo.contact),
109 repository=safe_unicode(repo.name),
110 path=safe_unicode(path),
111 content=u_content,
112 modtime=os.path.getmtime(path),
113 extension=ext)
114 except OSError, e:
115 import errno
116 if e.errno == errno.ENOENT:
117 log.debug('path %s does not exist or is a broken symlink' % path)
118 else:
119 raise e
113 writer.add_document(owner=unicode(repo.contact),
114 repository=safe_unicode(repo.name),
115 path=safe_unicode(path),
116 content=u_content,
117 modtime=mktime(node.last_changeset.date.timetuple()),
118 extension=node.extension)
120 119
121 120
122 121 def build_index(self):
123 122 if os.path.exists(IDX_LOCATION):
124 log.debug('removing previos index')
123 log.debug('removing previous index')
125 124 rmtree(IDX_LOCATION)
126 125
127 126 if not os.path.exists(IDX_LOCATION):
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now